Here I am importing the file which contains global DNAm estimates by ELISA-based MethylFlash that are linked to the patient ID.
outfile1 <- here("PFF_Results/PFF_MethylFlash_ExptID_1-751_Combined_2022_05_05.xlsx")
dnam <- read_excel(outfile1, sheet="CalculatedValues")
Match up PM25 data with methylation data
outfile2 <- here("PM25_Data/Final_PFF_fILD_PM_BaselineData_2022_02_12.xlsx")
PM <- read_excel(outfile2)
str(dnam)
## tibble [751 × 12] (S3: tbl_df/tbl/data.frame)
## $ SSID : chr [1:751] "11R4536" "04R1356" "10R4092" "12R5016" ...
## $ sampID : num [1:751] 79012226 79011626 79090026 79014826 79013526 ...
## $ exptID : num [1:751] 1 2 3 4 5 6 7 8 9 10 ...
## $ run_date : POSIXct[1:751], format: "2022-04-06" "2022-04-06" ...
## $ plate : num [1:751] 1 1 1 1 1 1 1 1 1 1 ...
## $ well : chr [1:751] "G2" "A3" "C3" "E3" ...
## $ raw_mean : num [1:751] 0.1109 0.0711 0.1041 0.0822 0.0923 ...
## $ raw_stdev : num [1:751] 0.00545 0.0017 0.01407 0.00325 0.01103 ...
## $ coef_var : num [1:751] 0.0491 0.0239 0.1352 0.0396 0.1195 ...
## $ calc_slope : num [1:751] 0.0935 0.0935 0.0935 0.0935 0.0935 ...
## $ calc_intercept: num [1:751] 0.335 0.335 0.335 0.335 0.335 ...
## $ pct_5mC : num [1:751] 0.0905 0.0592 0.0842 0.0666 0.0742 ...
dnam <- dnam %>% dplyr::select(SSID, sampID, exptID, run_date, plate, pct_5mC)
dnam <- left_join(dnam, PM, by="SSID")
outfile2b <- here("PM25_Data/PFF_fILD_2000_2018_PM25_2021_10_08.xlsx")
PM <- read_excel(outfile2b)
PM <- PM %>% rename("SSID"="ID")
dnam <- inner_join(PM, dnam, by="SSID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
dnam <- dnam %>% dplyr::select(!c(PM_date, value))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(7:234), names_to="PM_date", names_prefix="PM25_")
dnamx <- dnam
dnamx$PM_date <- gsub("jan", "01-01-20", dnamx$PM_date)
dnamx$PM_date <- gsub("feb", "01-02-20", dnamx$PM_date)
dnamx$PM_date <- gsub("mar", "01-03-20", dnamx$PM_date)
dnamx$PM_date <- gsub("apr", "01-04-20", dnamx$PM_date)
dnamx$PM_date <- gsub("may", "01-05-20", dnamx$PM_date)
dnamx$PM_date <- gsub("jun", "01-06-20", dnamx$PM_date)
dnamx$PM_date <- gsub("jul", "01-07-20", dnamx$PM_date)
dnamx$PM_date <- gsub("aug", "01-08-20", dnamx$PM_date)
dnamx$PM_date <- gsub("sep", "01-09-20", dnamx$PM_date)
dnamx$PM_date <- gsub("oct", "01-10-20", dnamx$PM_date)
dnamx$PM_date <- gsub("nov", "01-11-20", dnamx$PM_date)
dnamx$PM_date <- gsub("dec", "01-12-20", dnamx$PM_date)
dnamx$PM_date <- format(as.Date(dnamx$PM_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$PM_date <- as.Date(dnamx$PM_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("run_date", "dx_date", "consent_date", "death_date", "tx_date", "sample_date", "fvc_date", "dlco_date", "censor_date", "deathORtx_date", "DeathTxCensor_date", "PM_date"), as.Date)
str(dnam)
## tibble [171,228 × 63] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:171228] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow.x : num [1:171228] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist.x : num [1:171228] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ SSID : chr [1:171228] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ lon : num [1:171228] -123 -123 -123 -123 -123 ...
## $ lat : num [1:171228] 38.4 38.4 38.4 38.4 38.4 ...
## $ sampID : num [1:171228] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:171228] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:171228], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:171228] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:171228] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:171228] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:171228] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:171228] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:171228] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:171228] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:171228] 74 74 74 74 74 ...
## $ status : chr [1:171228] "1" "1" "1" "1" ...
## $ deadORtx : num [1:171228] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:171228] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:171228] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:171228], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:171228], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:171228], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:171228], format: NA NA ...
## $ death_date : Date[1:171228], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:171228], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:171228], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:171228], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:171228], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:171228], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:171228] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:171228] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:171228] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:171228] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:171228] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:171228] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:171228] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:171228] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:171228] "95405" "95405" "95405" "95405" ...
## $ lat.x : num [1:171228] 38.4 38.4 38.4 38.4 38.4 ...
## $ lon.x : num [1:171228] -123 -123 -123 -123 -123 ...
## $ state : chr [1:171228] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:171228] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:171228] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
## $ nrow.y : num [1:171228] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist.y : num [1:171228] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon.y : num [1:171228] -123 -123 -123 -123 -123 ...
## $ lat.y : num [1:171228] 38.4 38.4 38.4 38.4 38.4 ...
## $ PM_5yrPreCensor : num [1:171228] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:171228] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:171228] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:171228] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:171228] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:171228] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:171228] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:171228] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:171228], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:171228] 13 8 6.2 6.5 4.7 ...
dnam <- dnam %>% dplyr::select(!c("nrow.x", "dist.x", "lon", "lat", "lat.x", "lon.x", "nrow.y", "dist.y", "lon.y", "lat.y"))
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average PM value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(PM_5yrPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - years(5)) &
ymd(PM_date)<=ymd(sample_date)]))
Here I am calculating the average PM value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(PM_1yrPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - years(1)) &
ymd(PM_date)<=ymd(sample_date)]))
Here I am calculating the average PM value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(PM_6moPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - months(6)) &
ymd(PM_date)<=ymd(sample_date)]))
Here I am calculating the average PM value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(PM_3moPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - months(3)) &
ymd(PM_date)<=ymd(sample_date)]))
Here I am calculating the average PM value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(PM_1moPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - months(2)) &
ymd(PM_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx"))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 734 observations.
dnam <- dnam %>% dplyr::select(!value)
dnam <- dnam %>% filter(!str_detect(SSID, "^08R"))
This takes us down to 733 observations after we remove UPitt patients which may be doubled up in our Simmons analyses.
outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_SO4_2021_11_05.xlsx")
SO4 <- read_excel(outfile3)
dnam <- inner_join(SO4, dnam, by="ID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(6:221), names_to="SO4_date", names_prefix="SO4_", names_repair = "minimal")
dnamx <- dnam
dnamx$SO4_date <- gsub("jan", "01-01-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("feb", "01-02-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("mar", "01-03-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("apr", "01-04-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("may", "01-05-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("jun", "01-06-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("jul", "01-07-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("aug", "01-08-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("sep", "01-09-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("oct", "01-10-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("nov", "01-11-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("dec", "01-12-20", dnamx$SO4_date)
dnamx$SO4_date <- format(as.Date(dnamx$SO4_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$SO4_date <- as.Date(dnamx$SO4_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("SO4_date"), as.Date)
str(dnam)
## tibble [161,136 × 63] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon : num [1:161136] -123 -123 -123 -123 -123 ...
## $ lat : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
## $ SSID : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ sampID : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:161136] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:161136] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:161136] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:161136] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:161136] 74 74 74 74 74 ...
## $ status : chr [1:161136] "1" "1" "1" "1" ...
## $ deadORtx : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:161136], format: NA NA ...
## $ death_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ state : chr [1:161136] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ PM_5yrPreCensor : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:161136] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_1yrPreSamp : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
## $ PM_6moPreSamp : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
## $ PM_3moPreSamp : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
## $ PM_1moPreSamp : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
## $ SO4_date : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:161136] 1.3 0.3 0.5 0.6 0.9 ...
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average SO4 value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SO4_5yrPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - years(5)) &
ymd(SO4_date)<=ymd(sample_date)]))
Here I am calculating the average SO4 value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SO4_1yrPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - years(1)) &
ymd(SO4_date)<=ymd(sample_date)]))
Here I am calculating the average SO4 value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SO4_6moPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - months(6)) &
ymd(SO4_date)<=ymd(sample_date)]))
Here I am calculating the average SO4 value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SO4_3moPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - months(3)) &
ymd(SO4_date)<=ymd(sample_date)]))
Here I am calculating the average SO4 value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SO4_1moPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - months(2)) &
ymd(SO4_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx", "PM", "SO4"))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 733 observations.
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, SO4_date, value))
outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_NO3_2021_11_05.xlsx")
NO3 <- read_excel(outfile3)
dnam <- inner_join(NO3, dnam, by="ID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(6:221), names_to="NO3_date", names_prefix="NIT_", names_repair = "minimal")
dnamx <- dnam
dnamx$NO3_date <- gsub("jan", "01-01-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("feb", "01-02-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("mar", "01-03-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("apr", "01-04-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("may", "01-05-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("jun", "01-06-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("jul", "01-07-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("aug", "01-08-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("sep", "01-09-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("oct", "01-10-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("nov", "01-11-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("dec", "01-12-20", dnamx$NO3_date)
dnamx$NO3_date <- format(as.Date(dnamx$NO3_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$NO3_date <- as.Date(dnamx$NO3_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("NO3_date"), as.Date)
str(dnam)
## tibble [161,136 × 68] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon : num [1:161136] -123 -123 -123 -123 -123 ...
## $ lat : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
## $ SSID : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ sampID : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:161136] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:161136] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:161136] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:161136] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:161136] 74 74 74 74 74 ...
## $ status : chr [1:161136] "1" "1" "1" "1" ...
## $ deadORtx : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:161136], format: NA NA ...
## $ death_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ state : chr [1:161136] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ PM_5yrPreCensor : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:161136] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_1yrPreSamp : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
## $ PM_6moPreSamp : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
## $ PM_3moPreSamp : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
## $ PM_1moPreSamp : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
## $ SO4_5yrPreSamp : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
## $ SO4_1yrPreSamp : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
## $ SO4_6moPreSamp : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
## $ SO4_3moPreSamp : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
## $ SO4_1moPreSamp : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
## $ NO3_date : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:161136] 5 1.2 1.7 0.7 0.3 ...
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average NO3 value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NO3_5yrPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - years(5)) &
ymd(NO3_date)<=ymd(sample_date)]))
Here I am calculating the average NO3 value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NO3_1yrPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - years(1)) &
ymd(NO3_date)<=ymd(sample_date)]))
Here I am calculating the average NO3 value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NO3_6moPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - months(6)) &
ymd(NO3_date)<=ymd(sample_date)]))
Here I am calculating the average NO3 value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NO3_3moPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - months(3)) &
ymd(NO3_date)<=ymd(sample_date)]))
Here I am calculating the average NO3 value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NO3_1moPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - months(2)) &
ymd(NO3_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx", "NO3"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, NO3_date))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 733 observations.
outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_NH4_2021_11_05.xlsx")
NH4 <- read_excel(outfile3)
dnam <- inner_join(NH4, dnam, by="ID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(6:221), names_to="NH4_date", names_prefix="NH4_", names_repair = "minimal")
dnamx <- dnam
dnamx$NH4_date <- gsub("jan", "01-01-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("feb", "01-02-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("mar", "01-03-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("apr", "01-04-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("may", "01-05-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("jun", "01-06-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("jul", "01-07-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("aug", "01-08-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("sep", "01-09-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("oct", "01-10-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("nov", "01-11-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("dec", "01-12-20", dnamx$NH4_date)
dnamx$NH4_date <- format(as.Date(dnamx$NH4_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$NH4_date <- as.Date(dnamx$NH4_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("NH4_date"), as.Date)
str(dnam)
## tibble [161,136 × 73] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon : num [1:161136] -123 -123 -123 -123 -123 ...
## $ lat : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
## $ SSID : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ sampID : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:161136] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:161136] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:161136] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:161136] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:161136] 74 74 74 74 74 ...
## $ status : chr [1:161136] "1" "1" "1" "1" ...
## $ deadORtx : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:161136], format: NA NA ...
## $ death_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ state : chr [1:161136] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ PM_5yrPreCensor : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:161136] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_1yrPreSamp : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
## $ PM_6moPreSamp : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
## $ PM_3moPreSamp : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
## $ PM_1moPreSamp : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
## $ SO4_5yrPreSamp : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
## $ SO4_1yrPreSamp : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
## $ SO4_6moPreSamp : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
## $ SO4_3moPreSamp : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
## $ SO4_1moPreSamp : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
## $ NO3_5yrPreSamp : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
## $ NO3_1yrPreSamp : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
## $ NO3_6moPreSamp : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
## $ NO3_3moPreSamp : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
## $ NO3_1moPreSamp : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
## $ NH4_date : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:161136] 0.4 0 0.1 0.1 0.1 ...
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average NH4 value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NH4_5yrPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - years(5)) &
ymd(NH4_date)<=ymd(sample_date)]))
Here I am calculating the average NH4 value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NH4_1yrPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - years(1)) &
ymd(NH4_date)<=ymd(sample_date)]))
Here I am calculating the average NH4 value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NH4_6moPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - months(6)) &
ymd(NH4_date)<=ymd(sample_date)]))
Here I am calculating the average NH4 value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NH4_3moPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - months(3)) &
ymd(NH4_date)<=ymd(sample_date)]))
Here I am calculating the average NH4 value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(NH4_1moPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - months(2)) &
ymd(NH4_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx", "NH4"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, NH4_date))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 733 observations.
outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_BC_2021_11_05.xlsx")
BC <- read_excel(outfile3)
dnam <- inner_join(BC, dnam, by="ID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(6:221), names_to="BC_date", names_prefix="BC_", names_repair = "minimal")
dnamx <- dnam
dnamx$BC_date <- gsub("jan", "01-01-20", dnamx$BC_date)
dnamx$BC_date <- gsub("feb", "01-02-20", dnamx$BC_date)
dnamx$BC_date <- gsub("mar", "01-03-20", dnamx$BC_date)
dnamx$BC_date <- gsub("apr", "01-04-20", dnamx$BC_date)
dnamx$BC_date <- gsub("may", "01-05-20", dnamx$BC_date)
dnamx$BC_date <- gsub("jun", "01-06-20", dnamx$BC_date)
dnamx$BC_date <- gsub("jul", "01-07-20", dnamx$BC_date)
dnamx$BC_date <- gsub("aug", "01-08-20", dnamx$BC_date)
dnamx$BC_date <- gsub("sep", "01-09-20", dnamx$BC_date)
dnamx$BC_date <- gsub("oct", "01-10-20", dnamx$BC_date)
dnamx$BC_date <- gsub("nov", "01-11-20", dnamx$BC_date)
dnamx$BC_date <- gsub("dec", "01-12-20", dnamx$BC_date)
dnamx$BC_date <- format(as.Date(dnamx$BC_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$BC_date <- as.Date(dnamx$BC_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("BC_date"), as.Date)
str(dnam)
## tibble [161,136 × 78] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon : num [1:161136] -123 -123 -123 -123 -123 ...
## $ lat : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
## $ SSID : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ sampID : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:161136] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:161136] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:161136] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:161136] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:161136] 74 74 74 74 74 ...
## $ status : chr [1:161136] "1" "1" "1" "1" ...
## $ deadORtx : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:161136], format: NA NA ...
## $ death_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ state : chr [1:161136] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ PM_5yrPreCensor : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:161136] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_1yrPreSamp : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
## $ PM_6moPreSamp : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
## $ PM_3moPreSamp : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
## $ PM_1moPreSamp : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
## $ SO4_5yrPreSamp : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
## $ SO4_1yrPreSamp : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
## $ SO4_6moPreSamp : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
## $ SO4_3moPreSamp : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
## $ SO4_1moPreSamp : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
## $ NO3_5yrPreSamp : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
## $ NO3_1yrPreSamp : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
## $ NO3_6moPreSamp : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
## $ NO3_3moPreSamp : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
## $ NO3_1moPreSamp : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
## $ NH4_5yrPreSamp : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
## $ NH4_1yrPreSamp : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
## $ NH4_6moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ NH4_3moPreSamp : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
## $ NH4_1moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ BC_date : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:161136] 1.4 1 0.7 1.2 0.9 ...
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average BC value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(BC_5yrPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - years(5)) &
ymd(BC_date)<=ymd(sample_date)]))
Here I am calculating the average BC value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(BC_1yrPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - years(1)) &
ymd(BC_date)<=ymd(sample_date)]))
Here I am calculating the average BC value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(BC_6moPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - months(6)) &
ymd(BC_date)<=ymd(sample_date)]))
Here I am calculating the average BC value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(BC_3moPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - months(3)) &
ymd(BC_date)<=ymd(sample_date)]))
Here I am calculating the average BC value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(BC_1moPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - months(2)) &
ymd(BC_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx", "BC"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, BC_date))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 733 observations.
outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_OM_2021_11_05.xlsx")
OM <- read_excel(outfile3)
dnam <- inner_join(OM, dnam, by="ID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(6:221), names_to="OM_date", names_prefix="OM_", names_repair = "minimal")
dnamx <- dnam
dnamx$OM_date <- gsub("jan", "01-01-20", dnamx$OM_date)
dnamx$OM_date <- gsub("feb", "01-02-20", dnamx$OM_date)
dnamx$OM_date <- gsub("mar", "01-03-20", dnamx$OM_date)
dnamx$OM_date <- gsub("apr", "01-04-20", dnamx$OM_date)
dnamx$OM_date <- gsub("may", "01-05-20", dnamx$OM_date)
dnamx$OM_date <- gsub("jun", "01-06-20", dnamx$OM_date)
dnamx$OM_date <- gsub("jul", "01-07-20", dnamx$OM_date)
dnamx$OM_date <- gsub("aug", "01-08-20", dnamx$OM_date)
dnamx$OM_date <- gsub("sep", "01-09-20", dnamx$OM_date)
dnamx$OM_date <- gsub("oct", "01-10-20", dnamx$OM_date)
dnamx$OM_date <- gsub("nov", "01-11-20", dnamx$OM_date)
dnamx$OM_date <- gsub("dec", "01-12-20", dnamx$OM_date)
dnamx$OM_date <- format(as.Date(dnamx$OM_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$OM_date <- as.Date(dnamx$OM_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("OM_date"), as.Date)
str(dnam)
## tibble [161,136 × 83] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon : num [1:161136] -123 -123 -123 -123 -123 ...
## $ lat : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
## $ SSID : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ sampID : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:161136] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:161136] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:161136] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:161136] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:161136] 74 74 74 74 74 ...
## $ status : chr [1:161136] "1" "1" "1" "1" ...
## $ deadORtx : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:161136], format: NA NA ...
## $ death_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ state : chr [1:161136] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ PM_5yrPreCensor : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:161136] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_1yrPreSamp : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
## $ PM_6moPreSamp : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
## $ PM_3moPreSamp : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
## $ PM_1moPreSamp : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
## $ SO4_5yrPreSamp : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
## $ SO4_1yrPreSamp : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
## $ SO4_6moPreSamp : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
## $ SO4_3moPreSamp : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
## $ SO4_1moPreSamp : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
## $ NO3_5yrPreSamp : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
## $ NO3_1yrPreSamp : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
## $ NO3_6moPreSamp : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
## $ NO3_3moPreSamp : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
## $ NO3_1moPreSamp : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
## $ NH4_5yrPreSamp : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
## $ NH4_1yrPreSamp : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
## $ NH4_6moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ NH4_3moPreSamp : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
## $ NH4_1moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ BC_5yrPreSamp : num [1:161136] 0.738 0.738 0.738 0.738 0.738 ...
## $ BC_1yrPreSamp : num [1:161136] 0.575 0.575 0.575 0.575 0.575 ...
## $ BC_6moPreSamp : num [1:161136] 0.667 0.667 0.667 0.667 0.667 ...
## $ BC_3moPreSamp : num [1:161136] 0.367 0.367 0.367 0.367 0.367 ...
## $ BC_1moPreSamp : num [1:161136] 0.35 0.35 0.35 0.35 0.35 ...
## $ OM_date : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:161136] 10.9 3.2 11.3 9.7 5.5 ...
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average OM value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(OM_5yrPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - years(5)) &
ymd(OM_date)<=ymd(sample_date)]))
Here I am calculating the average OM value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(OM_1yrPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - years(1)) &
ymd(OM_date)<=ymd(sample_date)]))
Here I am calculating the average OM value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(OM_6moPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - months(6)) &
ymd(OM_date)<=ymd(sample_date)]))
Here I am calculating the average OM value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(OM_3moPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - months(3)) &
ymd(OM_date)<=ymd(sample_date)]))
Here I am calculating the average OM value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(OM_1moPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - months(2)) &
ymd(OM_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx", "OM"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, OM_date))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 733 observations.
outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_SS_2021_11_05.xlsx")
SS <- read_excel(outfile3)
dnam <- inner_join(SS, dnam, by="ID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(6:221), names_to="SS_date", names_prefix="SS_", names_repair = "minimal")
dnamx <- dnam
dnamx$SS_date <- gsub("jan", "01-01-20", dnamx$SS_date)
dnamx$SS_date <- gsub("feb", "01-02-20", dnamx$SS_date)
dnamx$SS_date <- gsub("mar", "01-03-20", dnamx$SS_date)
dnamx$SS_date <- gsub("apr", "01-04-20", dnamx$SS_date)
dnamx$SS_date <- gsub("may", "01-05-20", dnamx$SS_date)
dnamx$SS_date <- gsub("jun", "01-06-20", dnamx$SS_date)
dnamx$SS_date <- gsub("jul", "01-07-20", dnamx$SS_date)
dnamx$SS_date <- gsub("aug", "01-08-20", dnamx$SS_date)
dnamx$SS_date <- gsub("sep", "01-09-20", dnamx$SS_date)
dnamx$SS_date <- gsub("oct", "01-10-20", dnamx$SS_date)
dnamx$SS_date <- gsub("nov", "01-11-20", dnamx$SS_date)
dnamx$SS_date <- gsub("dec", "01-12-20", dnamx$SS_date)
dnamx$SS_date <- format(as.Date(dnamx$SS_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$SS_date <- as.Date(dnamx$SS_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("SS_date"), as.Date)
str(dnam)
## tibble [161,136 × 88] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon : num [1:161136] -123 -123 -123 -123 -123 ...
## $ lat : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
## $ SSID : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ sampID : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:161136] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:161136] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:161136] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:161136] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:161136] 74 74 74 74 74 ...
## $ status : chr [1:161136] "1" "1" "1" "1" ...
## $ deadORtx : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:161136], format: NA NA ...
## $ death_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ state : chr [1:161136] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ PM_5yrPreCensor : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:161136] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_1yrPreSamp : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
## $ PM_6moPreSamp : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
## $ PM_3moPreSamp : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
## $ PM_1moPreSamp : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
## $ SO4_5yrPreSamp : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
## $ SO4_1yrPreSamp : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
## $ SO4_6moPreSamp : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
## $ SO4_3moPreSamp : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
## $ SO4_1moPreSamp : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
## $ NO3_5yrPreSamp : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
## $ NO3_1yrPreSamp : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
## $ NO3_6moPreSamp : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
## $ NO3_3moPreSamp : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
## $ NO3_1moPreSamp : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
## $ NH4_5yrPreSamp : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
## $ NH4_1yrPreSamp : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
## $ NH4_6moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ NH4_3moPreSamp : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
## $ NH4_1moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ BC_5yrPreSamp : num [1:161136] 0.738 0.738 0.738 0.738 0.738 ...
## $ BC_1yrPreSamp : num [1:161136] 0.575 0.575 0.575 0.575 0.575 ...
## $ BC_6moPreSamp : num [1:161136] 0.667 0.667 0.667 0.667 0.667 ...
## $ BC_3moPreSamp : num [1:161136] 0.367 0.367 0.367 0.367 0.367 ...
## $ BC_1moPreSamp : num [1:161136] 0.35 0.35 0.35 0.35 0.35 ...
## $ OM_5yrPreSamp : num [1:161136] 3.95 3.95 3.95 3.95 3.95 ...
## $ OM_1yrPreSamp : num [1:161136] 3.08 3.08 3.08 3.08 3.08 ...
## $ OM_6moPreSamp : num [1:161136] 3.52 3.52 3.52 3.52 3.52 ...
## $ OM_3moPreSamp : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
## $ OM_1moPreSamp : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
## $ SS_date : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:161136] 1.3 1.4 7.4 0.9 1.5 ...
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average SS value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SS_5yrPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - years(5)) &
ymd(SS_date)<=ymd(sample_date)]))
Here I am calculating the average SS value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SS_1yrPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - years(1)) &
ymd(SS_date)<=ymd(sample_date)]))
Here I am calculating the average SS value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SS_6moPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - months(6)) &
ymd(SS_date)<=ymd(sample_date)]))
Here I am calculating the average SS value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SS_3moPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - months(3)) &
ymd(SS_date)<=ymd(sample_date)]))
Here I am calculating the average SS value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(SS_1moPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - months(2)) &
ymd(SS_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx", "SS"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, SS_date))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 733 observations.
outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_soil_2021_11_05.xlsx")
Soil <- read_excel(outfile3)
dnam <- inner_join(Soil, dnam, by="ID")
Reorder so “ID” is the first column
dnam <- dnam %>% dplyr::select(ID, everything(.))
First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.
dnam <- dnam %>%
pivot_longer(cols=c(6:221), names_to="Soil_date", names_prefix="soil_", names_repair = "minimal")
dnamx <- dnam
dnamx$Soil_date <- gsub("jan", "01-01-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("feb", "01-02-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("mar", "01-03-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("apr", "01-04-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("may", "01-05-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("jun", "01-06-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("jul", "01-07-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("aug", "01-08-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("sep", "01-09-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("oct", "01-10-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("nov", "01-11-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("dec", "01-12-20", dnamx$Soil_date)
dnamx$Soil_date <- format(as.Date(dnamx$Soil_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$Soil_date <- as.Date(dnamx$Soil_date)
dnam <- dnamx
Next I need to convert all date columns to proper format
dnam <- dnam %>%
mutate_at(c("Soil_date"), as.Date)
str(dnam)
## tibble [161,136 × 93] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
## $ nrow : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
## $ dist : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
## $ lon : num [1:161136] -123 -123 -123 -123 -123 ...
## $ lat : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
## $ SSID : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
## $ sampID : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
## $ exptID : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
## $ run_date : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
## $ plate : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
## $ pct_5mC : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
## $ sex : chr [1:161136] "Male" "Male" "Male" "Male" ...
## $ race : chr [1:161136] "W" "W" "W" "W" ...
## $ dich_Race : chr [1:161136] "White" "White" "White" "White" ...
## $ ethnicity : chr [1:161136] "N" "N" "N" "N" ...
## $ smokeHx : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
## $ age_dx : num [1:161136] 74 74 74 74 74 ...
## $ status : chr [1:161136] "1" "1" "1" "1" ...
## $ deadORtx : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ dx : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_group : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ dx_date : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
## $ consent_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ censor_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ tx_date : Date[1:161136], format: NA NA ...
## $ death_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ deathORtx_date : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
## $ sample_date : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
## $ fvc_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ dlco_date : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
## $ fvc_pct : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
## $ dlco_pct : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
## $ fvc_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ dlco_timefromdx : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
## $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
## $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
## $ same_zip : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ zip_new : chr [1:161136] "95405" "95405" "95405" "95405" ...
## $ state : chr [1:161136] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
## $ time_censoring : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_death : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_tx : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
## $ PM_5yrPreCensor : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_5yrPreDx : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM5yrCensor_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ PM5yr_dich : chr [1:161136] "Low" "Low" "Low" "Low" ...
## $ dx_IPF : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
## $ ruca : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
## $ metro : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:161136] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
## $ PM_1yrPreSamp : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
## $ PM_6moPreSamp : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
## $ PM_3moPreSamp : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
## $ PM_1moPreSamp : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
## $ SO4_5yrPreSamp : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
## $ SO4_1yrPreSamp : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
## $ SO4_6moPreSamp : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
## $ SO4_3moPreSamp : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
## $ SO4_1moPreSamp : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
## $ NO3_5yrPreSamp : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
## $ NO3_1yrPreSamp : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
## $ NO3_6moPreSamp : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
## $ NO3_3moPreSamp : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
## $ NO3_1moPreSamp : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
## $ NH4_5yrPreSamp : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
## $ NH4_1yrPreSamp : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
## $ NH4_6moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ NH4_3moPreSamp : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
## $ NH4_1moPreSamp : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
## $ BC_5yrPreSamp : num [1:161136] 0.738 0.738 0.738 0.738 0.738 ...
## $ BC_1yrPreSamp : num [1:161136] 0.575 0.575 0.575 0.575 0.575 ...
## $ BC_6moPreSamp : num [1:161136] 0.667 0.667 0.667 0.667 0.667 ...
## $ BC_3moPreSamp : num [1:161136] 0.367 0.367 0.367 0.367 0.367 ...
## $ BC_1moPreSamp : num [1:161136] 0.35 0.35 0.35 0.35 0.35 ...
## $ OM_5yrPreSamp : num [1:161136] 3.95 3.95 3.95 3.95 3.95 ...
## $ OM_1yrPreSamp : num [1:161136] 3.08 3.08 3.08 3.08 3.08 ...
## $ OM_6moPreSamp : num [1:161136] 3.52 3.52 3.52 3.52 3.52 ...
## $ OM_3moPreSamp : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
## $ OM_1moPreSamp : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
## $ SS_5yrPreSamp : num [1:161136] 1.34 1.34 1.34 1.34 1.34 ...
## $ SS_1yrPreSamp : num [1:161136] 1.1 1.1 1.1 1.1 1.1 ...
## $ SS_6moPreSamp : num [1:161136] 1.52 1.52 1.52 1.52 1.52 ...
## $ SS_3moPreSamp : num [1:161136] 2.07 2.07 2.07 2.07 2.07 ...
## $ SS_1moPreSamp : num [1:161136] 2.3 2.3 2.3 2.3 2.3 ...
## $ Soil_date : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
## $ value : num [1:161136] 0.4 0.3 0.3 0.6 0.4 ...
Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)
Here I am calculating the average Soil value in the 5yrs prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(Soil_5yrPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - years(5)) &
ymd(Soil_date)<=ymd(sample_date)]))
Here I am calculating the average Soil value in the 1yr prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(Soil_1yrPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - years(1)) &
ymd(Soil_date)<=ymd(sample_date)]))
Here I am calculating the average Soil value in the 6mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(Soil_6moPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - months(6)) &
ymd(Soil_date)<=ymd(sample_date)]))
Here I am calculating the average Soil value in the 3mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(Soil_3moPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - months(3)) &
ymd(Soil_date)<=ymd(sample_date)]))
Here I am calculating the average Soil value in the 1mo prior to sampling.
dnam <- dnam %>%
group_by(ID) %>%
mutate(Soil_1moPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - months(2)) &
ymd(Soil_date)<=ymd(sample_date)]))
Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.
rm(list=c("dnamx", "Soil"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, Soil_date))
Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.
dnam <- dnam %>%
distinct_at(vars(ID), .keep_all=T)
This takes us down to 733 observations.
dnam <- dnam %>% select(ID, everything(.))
write_xlsx(dnam, path="PFF_fILD_PM25andConstituentsMatched_forMethylFlash_2022_04_28.xlsx")
Here I am releveling factors so that they are in an intuitive order for my later analyses.
dnam$sex <- fct_relevel(dnam$sex, c("Male","Female"))
dnam$race <- fct_relevel(dnam$race, c("W","B","A","U"))
dnam$ethnicity <- fct_relevel(dnam$ethnicity, c("N","H","U"))
dnam$dich_Race <- fct_relevel(dnam$dich_Race, c("White","Non-White"))
dnam$smokeHx <- fct_relevel(dnam$smokeHx, c("Never","Ever"))
#For dx and dx_group, I just want IPF to be first and then the rest of the categories are alphabetical
dnam$dx <- fct_relevel(dnam$dx, c("IPF"))
dnam$dx_group <- fct_relevel(dnam$dx_group, c("IPF"))
dnam$dx_IPF <- fct_relevel(dnam$dx_IPF, c("IPF"))
#dnam$PM5yrCensor_dich <- fct_relevel(dnam$PM5yrCensor_dich, c("Low"))
#dnam$PM5yr_dich <- fct_relevel(dnam$PM5yr_dich, c("Low"))
#dnam$SO45yrCensor_dich <- fct_relevel(dnam$SO45yrCensor_dich, c("Low"))
#dnam$SO45yr_dich <- fct_relevel(dnam$SO45yr_dich, c("Low"))
#dnam$NO35yrCensor_dich <- fct_relevel(dnam$NO35yrCensor_dich, c("Low"))
#dnam$NO35yr_dich <- fct_relevel(dnam$NO35yr_dich, c("Low"))
#dnam$NH45yrCensor_dich <- fct_relevel(dnam$NH45yrCensor_dich, c("Low"))
#dnam$NH45yr_dich <- fct_relevel(dnam$NH45yr_dich, c("Low"))
#dnam$BC5yrCensor_dich <- fct_relevel(dnam$BC5yrCensor_dich, c("Low"))
#dnam$BC5yr_dich <- fct_relevel(dnam$BC5yr_dich, c("Low"))
#dnam$OM5yrCensor_dich <- fct_relevel(dnam$OM5yrCensor_dich, c("Low"))
#dnam$OM5yr_dich <- fct_relevel(dnam$OM5yr_dich, c("Low"))
#dnam$SS5yrCensor_dich <- fct_relevel(dnam$SS5yrCensor_dich, c("Low"))
#dnam$SS5yr_dich <- fct_relevel(dnam$SS5yr_dich, c("Low"))
#dnam$Soil5yrCensor_dich <- fct_relevel(dnam$Soil5yrCensor_dich, c("Low"))
#dnam$Soil5yr_dich <- fct_relevel(dnam$Soil5yr_dich, c("Low"))
str(dnam)
## grouped_df [746 × 92] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ ID : num [1:746] 513 514 515 516 517 519 520 521 523 524 ...
## $ SSID : chr [1:746] "02R0456" "02R0462" "02R0468" "02R0471" ...
## $ sampID : num [1:746] 77900524 77902774 79049726 77903624 79002826 ...
## $ exptID : num [1:746] 682 324 495 274 267 245 748 15 714 246 ...
## $ run_date : Date[1:746], format: "2022-04-25" "2022-04-12" ...
## $ plate : num [1:746] 17 8 13 7 7 6 19 1 18 6 ...
## $ pct_5mC : num [1:746] 0.1049 0.1397 0.1322 0.0622 0.0492 ...
## $ sex : Factor w/ 2 levels "Male","Female": 1 1 1 2 1 1 1 1 1 1 ...
## $ race : Factor w/ 4 levels "W","B","A","U": 1 1 1 4 1 1 1 1 1 1 ...
## $ dich_Race : Factor w/ 2 levels "White","Non-White": 1 1 1 2 1 1 1 1 1 1 ...
## $ ethnicity : Factor w/ 3 levels "N","H","U": 1 1 1 1 1 1 3 1 1 1 ...
## $ smokeHx : Factor w/ 2 levels "Never","Ever": 2 2 1 2 2 2 1 1 1 2 ...
## $ age_dx : num [1:746] 74 74.2 83.6 74.5 72.3 ...
## $ status : chr [1:746] "1" "0" "0" "1" ...
## $ deadORtx : num [1:746] 1 0 0 1 0 0 0 0 0 0 ...
## $ dx : Factor w/ 1 level "IPF": 1 1 1 1 1 1 1 1 1 1 ...
## $ dx_group : Factor w/ 1 level "IPF": 1 1 1 1 1 1 1 1 1 1 ...
## $ dx_date : Date[1:746], format: "2016-05-20" "2016-07-08" ...
## $ consent_date : Date[1:746], format: "2016-05-19" "2016-07-07" ...
## $ censor_date : Date[1:746], format: "2016-06-11" "2016-07-11" ...
## $ tx_date : Date[1:746], format: NA NA ...
## $ death_date : Date[1:746], format: "2016-06-11" NA ...
## $ deathORtx_date : Date[1:746], format: "2016-06-11" NA ...
## $ DeathTxCensor_date: Date[1:746], format: "2016-06-11" "2016-07-11" ...
## $ sample_date : Date[1:746], format: "2016-05-19" "2016-07-07" ...
## $ fvc_date : Date[1:746], format: "2016-03-17" "2016-04-14" ...
## $ dlco_date : Date[1:746], format: "2016-03-17" "2016-04-14" ...
## $ fvc_pct : num [1:746] 58.5 54.4 NA 81.4 97.2 ...
## $ dlco_pct : num [1:746] 28.8 32.7 NA 46.5 41.1 ...
## $ fvc_timefromdx : num [1:746] -0.175 -0.233 NA 0.832 -0.162 ...
## $ dlco_timefromdx : num [1:746] -0.175 -0.233 NA 0.832 -0.162 ...
## $ Reason_Termination: chr [1:746] "Death" "Lost to Follow-up" NA "Death" ...
## $ Death_ILD_Related : chr [1:746] "Yes" NA NA "Yes" ...
## $ same_zip : logi [1:746] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ zip : chr [1:746] "95405" "95667" "94578" "94945" ...
## $ zip_new : chr [1:746] "95405" "95667" "94578" "94945" ...
## $ state : chr [1:746] "CA" "CA" "CA" "CA" ...
## $ major_city : chr [1:746] "Santa Rosa" "Placerville" "San Leandro" "Novato" ...
## $ time_censoring : num [1:746] 0.063 0.011 3.392 3.184 1.681 ...
## $ time_death : num [1:746] 0.063 NA NA 3.184 NA ...
## $ time_tx : num [1:746] NA NA NA NA NA NA NA NA NA NA ...
## $ time_deathORtx : num [1:746] 0.063 NA NA 3.184 NA ...
## $ time_DeathTxCensor: num [1:746] 0.063 0.011 3.392 3.184 1.681 ...
## $ PM_5yrPreCensor : num [1:746] 7.62 5.87 10.82 8.52 5.17 ...
## $ PM_5yrPreDx : num [1:746] 7.62 5.87 9.72 8.27 5.48 ...
## $ PM5yrCensor_dich : chr [1:746] "Low" "Low" "High" "High" ...
## $ PM5yr_dich : chr [1:746] "Low" "Low" "High" "High" ...
## $ dx_IPF : Factor w/ 1 level "IPF": 1 1 1 1 1 1 1 1 1 1 ...
## $ ruca : num [1:746] 1 4 1 1 4 1 1 1 1 1 ...
## $ metro : chr [1:746] "metropolitan" "micropolitan" "metropolitan" "metropolitan" ...
## $ site : chr [1:746] "02R" "02R" "02R" "02R" ...
## $ PM_date : Date[1:746], format: "2000-01-01" "2000-01-01" ...
## $ PM_5yrPreSamp : num [1:746] 7.62 5.87 9.28 7.97 5.48 ...
## $ PM_1yrPreSamp : num [1:746] 6.31 4.67 8.3 6.4 4.58 ...
## $ PM_6moPreSamp : num [1:746] 6.6 4.43 9.02 6 5.77 ...
## $ PM_3moPreSamp : num [1:746] 5.63 5.03 9.43 6.27 6.33 ...
## $ PM_1moPreSamp : num [1:746] 6.35 5.25 8.55 6.15 6.7 ...
## $ SO4_5yrPreSamp : num [1:746] 0.673 0.475 0.767 0.69 0.448 ...
## $ SO4_1yrPreSamp : num [1:746] 0.475 0.333 0.708 0.508 0.358 ...
## $ SO4_6moPreSamp : num [1:746] 0.417 0.367 0.5 0.6 0.467 ...
## $ SO4_3moPreSamp : num [1:746] 0.567 0.467 0.433 0.867 0.4 ...
## $ SO4_1moPreSamp : num [1:746] 0.75 0.5 0.5 0.9 0.35 ...
## $ NO3_5yrPreSamp : num [1:746] 0.923 0.545 1.563 1.032 0.307 ...
## $ NO3_1yrPreSamp : num [1:746] 0.8 0.408 1.183 0.825 0.225 ...
## $ NO3_6moPreSamp : num [1:746] 0.883 0.35 1.433 0.717 0.233 ...
## $ NO3_3moPreSamp : num [1:746] 0.533 0.233 1.6 0.6 0.3 ...
## $ NO3_1moPreSamp : num [1:746] 0.6 0.25 1.4 0.6 0.35 ...
## $ NH4_5yrPreSamp : num [1:746] 0.143 0.225 0.305 0.242 0.232 ...
## $ NH4_1yrPreSamp : num [1:746] 0.0583 0.15 0.2417 0.1417 0.2333 ...
## $ NH4_6moPreSamp : num [1:746] 0.05 0.1167 0.3167 0.0833 0.2333 ...
## $ NH4_3moPreSamp : num [1:746] 0.0333 0.0333 0.4 0.0667 0.1667 ...
## $ NH4_1moPreSamp : num [1:746] 0.05 0 0.3 0.05 0.2 ...
## $ BC_5yrPreSamp : num [1:746] 0.738 0.342 0.715 0.557 0.328 ...
## $ BC_1yrPreSamp : num [1:746] 0.575 0.3 0.525 0.517 0.225 ...
## $ BC_6moPreSamp : num [1:746] 0.667 0.2 0.6 0.383 0.283 ...
## $ BC_3moPreSamp : num [1:746] 0.367 0.233 0.667 0.333 0.367 ...
## $ BC_1moPreSamp : num [1:746] 0.35 0.25 0.55 0.35 0.45 ...
## $ OM_5yrPreSamp : num [1:746] 3.95 2.08 3.5 2.62 2.19 ...
## $ OM_1yrPreSamp : num [1:746] 3.08 2.02 3.12 2.46 2.02 ...
## $ OM_6moPreSamp : num [1:746] 3.52 1.47 3.8 2.1 2.9 ...
## $ OM_3moPreSamp : num [1:746] 2.1 1.93 4.23 2.13 4.1 ...
## $ OM_1moPreSamp : num [1:746] 2.1 2.35 3.55 2.25 4.9 ...
## $ SS_5yrPreSamp : num [1:746] 1.345 0.2567 1.8717 1.69 0.0717 ...
## $ SS_1yrPreSamp : num [1:746] 1.1 0.3083 1.7833 1.5583 0.0667 ...
## $ SS_6moPreSamp : num [1:746] 1.5167 0.35 1.1333 2.0667 0.0667 ...
## $ SS_3moPreSamp : num [1:746] 2.0667 0.3 1.2333 2 0.0667 ...
## $ SS_1moPreSamp : num [1:746] 2.3 0.25 0.9 1.85 0 ...
## $ Soil_5yrPreSamp : num [1:746] 0.443 0.39 0.475 0.353 0.512 ...
## $ Soil_1yrPreSamp : num [1:746] 0.433 0.275 0.467 0.317 0.525 ...
## $ Soil_6moPreSamp : num [1:746] 0.383 0.317 0.5 0.333 0.8 ...
## $ Soil_3moPreSamp : num [1:746] 0.4 0.433 0.267 0.433 1.033 ...
## $ Soil_1moPreSamp : num [1:746] 0.5 0.5 0.25 0.4 1.25 ...
## - attr(*, "groups")= tibble [746 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ ID : num [1:746] 513 514 515 516 517 519 520 521 523 524 ...
## ..$ .rows: list<int> [1:746]
## .. ..$ : int 1
## .. ..$ : int 2
## .. ..$ : int 3
## .. ..$ : int 4
## .. ..$ : int 5
## .. ..$ : int 6
## .. ..$ : int 7
## .. ..$ : int 8
## .. ..$ : int 9
## .. ..$ : int 10
## .. ..$ : int 11
## .. ..$ : int 12
## .. ..$ : int 13
## .. ..$ : int 14
## .. ..$ : int 15
## .. ..$ : int 16
## .. ..$ : int 17
## .. ..$ : int 18
## .. ..$ : int 19
## .. ..$ : int 20
## .. ..$ : int 21
## .. ..$ : int 22
## .. ..$ : int 23
## .. ..$ : int 24
## .. ..$ : int 25
## .. ..$ : int 26
## .. ..$ : int 27
## .. ..$ : int 28
## .. ..$ : int 29
## .. ..$ : int 30
## .. ..$ : int 31
## .. ..$ : int 32
## .. ..$ : int 33
## .. ..$ : int 34
## .. ..$ : int 35
## .. ..$ : int 36
## .. ..$ : int 37
## .. ..$ : int 38
## .. ..$ : int 39
## .. ..$ : int 40
## .. ..$ : int 41
## .. ..$ : int 42
## .. ..$ : int 43
## .. ..$ : int 44
## .. ..$ : int 45
## .. ..$ : int 46
## .. ..$ : int 47
## .. ..$ : int 48
## .. ..$ : int 49
## .. ..$ : int 50
## .. ..$ : int 51
## .. ..$ : int 52
## .. ..$ : int 53
## .. ..$ : int 54
## .. ..$ : int 55
## .. ..$ : int 56
## .. ..$ : int 57
## .. ..$ : int 58
## .. ..$ : int 59
## .. ..$ : int 60
## .. ..$ : int 61
## .. ..$ : int 62
## .. ..$ : int 63
## .. ..$ : int 64
## .. ..$ : int 65
## .. ..$ : int 66
## .. ..$ : int 67
## .. ..$ : int 68
## .. ..$ : int 69
## .. ..$ : int 70
## .. ..$ : int 71
## .. ..$ : int 72
## .. ..$ : int 73
## .. ..$ : int 74
## .. ..$ : int 75
## .. ..$ : int 76
## .. ..$ : int 77
## .. ..$ : int 78
## .. ..$ : int 79
## .. ..$ : int 80
## .. ..$ : int 81
## .. ..$ : int 82
## .. ..$ : int 83
## .. ..$ : int 84
## .. ..$ : int 85
## .. ..$ : int 86
## .. ..$ : int 87
## .. ..$ : int 88
## .. ..$ : int 89
## .. ..$ : int 90
## .. ..$ : int 91
## .. ..$ : int 92
## .. ..$ : int 93
## .. ..$ : int 94
## .. ..$ : int 95
## .. ..$ : int 96
## .. ..$ : int 97
## .. ..$ : int 98
## .. ..$ : int 99
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
Need to know if consent_date is a reasonable time point to be basing our DNAm results from
dnam <- dnam %>% mutate(time_sample=(sample_date-consent_date)/365.25)
dnam$time_sample <- as.numeric(dnam$time_sample)
Will need to use the actual proportion of 5mC for beta regression analyses where values must be between 0 and 1
dnam <- dnam %>% mutate(prop_5mC=pct_5mC/100)
This is a function that allows me to make tables which summarize the count and percentages of each level of factor variables
n_prop_tbl <- function(x) {
tbl <- table(x)
res <- cbind(tbl, round(prop.table(tbl)*100,2))
colnames(res) <- c('Count', 'Percentage')
res
}
print("PM2.5")
## [1] "PM2.5"
summary(dnam$PM_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3.053 7.395 8.280 8.237 9.098 16.142 1
summary(dnam$PM_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.642 6.708 7.608 7.526 8.308 15.175 1
summary(dnam$PM_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.500 6.600 7.617 7.571 8.500 17.967 22
summary(dnam$PM_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.967 6.367 7.433 7.506 8.500 20.200 12
summary(dnam$PM_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.900 6.300 7.350 7.525 8.700 23.000 6
print("/nSO4")
## [1] "/nSO4"
summary(dnam$SO4_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.280 1.233 1.542 1.444 1.733 2.445 1
summary(dnam$SO4_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.1750 0.8833 1.1333 1.0955 1.2875 1.9333 5
summary(dnam$SO4_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.150 0.850 1.100 1.063 1.283 1.950 40
summary(dnam$SO4_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.1667 0.8333 1.0667 1.0615 1.3000 2.1333 125
summary(dnam$SO4_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.10 0.80 1.05 1.06 1.30 2.25 143
print("/nNO3")
## [1] "/nNO3"
summary(dnam$NO3_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.07167 0.49000 0.83667 0.90407 1.31042 4.26333 1
summary(dnam$NO3_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0750 0.4000 0.6583 0.7423 1.0083 3.4667 5
summary(dnam$NO3_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0800 0.4000 0.6000 0.7996 0.9833 5.5000 40
summary(dnam$NO3_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.3000 0.5333 0.7497 0.9000 7.8000 125
summary(dnam$NO3_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.3000 0.5000 0.7328 0.9000 7.4000 143
print("/nNH4")
## [1] "/nNH4"
summary(dnam$NH4_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.02833 0.32167 0.45500 0.47358 0.60833 1.38000 1
summary(dnam$NH4_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.008333 0.169231 0.250000 0.274247 0.358333 1.066667 5
summary(dnam$NH4_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1667 0.2500 0.2849 0.3500 1.9667 40
summary(dnam$NH4_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1333 0.2333 0.2700 0.3333 2.5000 125
summary(dnam$NH4_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1500 0.2000 0.2623 0.3500 2.2000 143
print("/nBC")
## [1] "/nBC"
summary(dnam$BC_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.1317 0.4967 0.6133 0.6134 0.7167 1.9650 1
summary(dnam$BC_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.08461 0.47500 0.60000 0.59388 0.70833 1.28571 5
summary(dnam$BC_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0500 0.4667 0.6000 0.6240 0.7500 1.8250 40
summary(dnam$BC_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.03333 0.43333 0.60000 0.58884 0.73333 1.90000 125
summary(dnam$BC_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.4000 0.6000 0.5854 0.7500 2.0000 143
print("/nOM")
## [1] "/nOM"
summary(dnam$OM_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.7333 2.2650 2.7333 2.8295 3.4267 6.9350 1
summary(dnam$OM_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.8077 2.3500 2.9583 3.0226 3.5667 8.7444 5
summary(dnam$OM_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.3167 2.3500 3.0333 3.1523 3.8000 12.6250 40
summary(dnam$OM_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.300 2.200 2.967 3.028 3.700 10.133 125
summary(dnam$OM_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.200 2.142 2.850 3.042 3.750 10.500 143
print("/nSS")
## [1] "/nSS"
summary(dnam$SS_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.01333 0.18833 0.24167 0.35948 0.36833 2.52034 1
summary(dnam$SS_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1833 0.2444 0.3898 0.3923 3.2714 5
summary(dnam$SS_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1333 0.2333 0.3821 0.4167 4.3667 40
summary(dnam$SS_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1333 0.2333 0.3786 0.4333 4.9333 125
summary(dnam$SS_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.1000 0.2000 0.3902 0.4500 5.5000 143
print("/nSoil")
## [1] "/nSoil"
summary(dnam$Soil_5yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.06833 0.35500 0.49333 0.60401 0.72679 2.42833 1
summary(dnam$Soil_1yrPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.05833 0.34167 0.51667 0.58605 0.72500 2.39167 5
summary(dnam$Soil_6moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.03333 0.31667 0.45833 0.57793 0.68333 2.95000 40
summary(dnam$Soil_3moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.03333 0.30000 0.46667 0.59090 0.66667 5.36667 125
summary(dnam$Soil_1moPreSamp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.300 0.450 0.593 0.700 7.100 143
n_prop_tbl(dnam$sex)
## Count Percentage
## Male 559 74.93
## Female 187 25.07
n_prop_tbl(dnam$race)
## Count Percentage
## W 706 94.64
## B 7 0.94
## A 20 2.68
## U 13 1.74
n_prop_tbl(dnam$ethnicity)
## Count Percentage
## N 702 94.10
## H 29 3.89
## U 15 2.01
n_prop_tbl(dnam$smokeHx)
## Count Percentage
## Never 267 35.79
## Ever 479 64.21
n_prop_tbl(dnam$dx_group)
## Count Percentage
## IPF 746 100
n_prop_tbl(dnam$state)
## Count Percentage
## AL 59 7.92
## AR 2 0.27
## AZ 29 3.89
## CA 57 7.65
## CO 7 0.94
## CT 21 2.82
## DC 3 0.40
## DE 3 0.40
## FL 21 2.82
## GA 36 4.83
## IA 2 0.27
## ID 2 0.27
## IL 22 2.95
## IN 6 0.81
## KS 7 0.94
## KY 15 2.01
## LA 17 2.28
## MA 9 1.21
## MD 18 2.42
## ME 1 0.13
## MI 45 6.04
## MN 29 3.89
## MO 14 1.88
## MS 12 1.61
## MT 1 0.13
## NC 23 3.09
## ND 1 0.13
## NE 1 0.13
## NH 6 0.81
## NJ 14 1.88
## NM 3 0.40
## NV 3 0.40
## NY 63 8.46
## OH 28 3.76
## PA 41 5.50
## SC 14 1.88
## SD 1 0.13
## TN 14 1.88
## TX 50 6.71
## UT 6 0.81
## VA 35 4.70
## WI 2 0.27
## WV 2 0.27
n_prop_tbl(dnam$metro)
## Count Percentage
## metropolitan 627 84.16
## micropolitan 63 8.46
## rural 55 7.38
Overwhelming majority of the patients are considered “metropolitan”.
shapiro.test(dnam$age_dx)
##
## Shapiro-Wilk normality test
##
## data: dnam$age_dx
## W = 0.96994, p-value = 2.961e-11
#Shapiro tests tells us that age_dx is not normally distributed, so should report median, IQR
summary(dnam$age_dx)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.59 64.15 69.37 68.69 73.95 91.78
Not normally distributed.
n_prop_tbl(dnam$status)
## Count Percentage
## 0 429 57.51
## 1 213 28.55
## 2 104 13.94
shapiro.test(dnam$fvc_pct)
##
## Shapiro-Wilk normality test
##
## data: dnam$fvc_pct
## W = 0.99413, p-value = 0.009972
summary(dnam$fvc_pct)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 23.55 55.28 67.10 68.05 79.70 120.43 68
sd(dnam$fvc_pct, na.rm=T)
## [1] 16.6119
shapiro.test(dnam$dlco_pct)
##
## Shapiro-Wilk normality test
##
## data: dnam$dlco_pct
## W = 0.92974, p-value < 2.2e-16
summary(dnam$dlco_pct)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 8.51 30.12 39.60 40.74 49.39 168.98 108
sd(dnam$dlco_pct, na.rm=T)
## [1] 15.33611
summary(dnam$time_DeathTxCensor)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.317 2.359 2.323 3.330 5.060
sd(dnam$time_DeathTxCensor, na.rm=T)
## [1] 1.298347
summary(dnam$time_sample)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.00137 0.00000 0.00000 0.01827 0.00000 1.39904
sd(dnam$time_sample, na.rm=T)
## [1] 0.1428165
This indicates that the majority of samples are taken very close to the time of consent, which is our reference for the _5yrPreDx periods for exposure matching, although there are a few where samples were taken far before or far after the consent_date.
Histogram of %5mC
(dnam %>% ggplot(aes(x=pct_5mC))+
geom_histogram(fill="blue", color="black")+
labs(x="% 5mC", y="Number of patients with IPF", title="% 5mC in Patients with IPF")+
theme(plot.title = element_text(hjust = 0.5)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We have some very high outliers, whreas the rest is generally <0.3%
Histogram of %5mC
(dnam %>% ggplot(aes(x=pct_5mC))+
geom_histogram(fill="blue", color="black")+
labs(x="% 5mC", y="Number of patients with IPF", title="% 5mC in Patients with IPF")+
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).
Slightly right-skewed distribution.
Scatterplot of continuous PM2.5 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=PM_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="PM2.5 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,20)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous PM2.5 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=PM_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="PM2.5 in 1yr Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 1yr Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,20)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous PM2.5 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=PM_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="PM2.5 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,20)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing non-finite values (stat_smooth).
## Warning: Removed 31 rows containing missing values (geom_point).
Scatterplot of continuous PM2.5 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=PM_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="PM2.5 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,20)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 23 rows containing non-finite values (stat_smooth).
## Warning: Removed 23 rows containing missing values (geom_point).
Scatterplot of continuous PM2.5 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=PM_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="PM2.5 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,20)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
Scatterplot of continuous SO4 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SO4_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SO4 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by SO4 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous SO4 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=PM_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SO4 in 1yr Pre-Sampling", y="% 5mC", title="%5mC by SO4 1yr Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,20)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous SO4 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SO4_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SO4 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by SO4 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).
Scatterplot of continuous SO4 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SO4_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SO4 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by SO4 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 135 rows containing non-finite values (stat_smooth).
## Warning: Removed 135 rows containing missing values (geom_point).
Scatterplot of continuous SO4 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SO4_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SO4 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by SO4 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 153 rows containing non-finite values (stat_smooth).
## Warning: Removed 153 rows containing missing values (geom_point).
Scatterplot of continuous NO3 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NO3_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NO3 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by NO3 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous NO3 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NO3_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NO3 in 1yrs Pre-Sampling", y="% 5mC", title="%5mC by NO3 1yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
Scatterplot of continuous NO3 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NO3_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NO3 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by NO3 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 50 rows containing non-finite values (stat_smooth).
## Warning: Removed 50 rows containing missing values (geom_point).
Scatterplot of continuous NO3 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NO3_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NO3 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by NO3 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 137 rows containing non-finite values (stat_smooth).
## Warning: Removed 137 rows containing missing values (geom_point).
Scatterplot of continuous NO3 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NO3_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NO3 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by NO3 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).
Scatterplot of continuous NH4 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NH4_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NH4 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by NH4 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 1.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous NH4 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NH4_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NH4 in 1yr Pre-Sampling", y="% 5mC", title="%5mC by NH4 1yr Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 1.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
Scatterplot of continuous NH4 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NH4_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NH4 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by NH4 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).
Scatterplot of continuous NH4 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NH4_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NH4 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by NH4 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).
Scatterplot of continuous NH4 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=NH4_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="NH4 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by NH4 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).
Scatterplot of continuous BC 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=BC_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="BC in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by BC 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous BC 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=BC_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="BC in 1yr Pre-Sampling", y="% 5mC", title="%5mC by BC 1yr Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
Scatterplot of continuous BC 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=BC_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="BC in 6mo Pre-Sampling", y="% 5mC", title="%5mC by BC 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).
Scatterplot of continuous BC 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=BC_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="BC in 3mo Pre-Sampling", y="% 5mC", title="%5mC by BC 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 135 rows containing non-finite values (stat_smooth).
## Warning: Removed 135 rows containing missing values (geom_point).
Scatterplot of continuous BC 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=BC_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="BC in 1mo Pre-Sampling", y="% 5mC", title="%5mC by BC 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,2)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 153 rows containing non-finite values (stat_smooth).
## Warning: Removed 153 rows containing missing values (geom_point).
Scatterplot of continuous OM 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=OM_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="OM in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by OM 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 8)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous OM 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=OM_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="OM in 1yr Pre-Sampling", y="% 5mC", title="%5mC by OM 1yr Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 8)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).
Scatterplot of continuous OM 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=OM_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="OM in 6mo Pre-Sampling", y="% 5mC", title="%5mC by OM 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,10)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 50 rows containing non-finite values (stat_smooth).
## Warning: Removed 50 rows containing missing values (geom_point).
Scatterplot of continuous OM 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=OM_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="OM in 3mo Pre-Sampling", y="% 5mC", title="%5mC by OM 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,10)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).
Scatterplot of continuous OM 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=OM_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="OM in 1mo Pre-Sampling", y="% 5mC", title="%5mC by OM 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,10)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).
Scatterplot of continuous SS 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SS_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SS in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by SS 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 2.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
Scatterplot of continuous SS 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SS_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SS in 1yr Pre-Sampling", y="% 5mC", title="%5mC by SS 1yr Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 2.5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
## Warning: Removed 25 rows containing missing values (geom_point).
Scatterplot of continuous SS 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SS_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SS in 6mo Pre-Sampling", y="% 5mC", title="%5mC by SS 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).
Scatterplot of continuous SS 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SS_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SS in 3mo Pre-Sampling", y="% 5mC", title="%5mC by SS 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 135 rows containing non-finite values (stat_smooth).
## Warning: Removed 135 rows containing missing values (geom_point).
Scatterplot of continuous SS 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=SS_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="SS in 1mo Pre-Sampling", y="% 5mC", title="%5mC by SS 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,5)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).
Scatterplot of continuous Soil 5yrs Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=Soil_5yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="Soil in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by Soil 5yrs Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 3)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
Scatterplot of continuous Soil 1yr Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=Soil_1yrPreSamp, y=pct_5mC))+
geom_point()+
labs(x="Soil in 1yr Pre-Sampling", y="% 5mC", title="%5mC by Soil 1yr Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0, 3)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).
Scatterplot of continuous Soil 6mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=Soil_6moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="Soil in 6mo Pre-Sampling", y="% 5mC", title="%5mC by Soil 6mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,3)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).
Scatterplot of continuous Soil 3mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=Soil_3moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="Soil in 3mo Pre-Sampling", y="% 5mC", title="%5mC by Soil 3mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,3)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).
Scatterplot of continuous Soil 1mo Pre-Sampling vs %5mC
(dnam %>% ggplot(aes(x=Soil_1moPreSamp, y=pct_5mC))+
geom_point()+
labs(x="Soil in 1mo Pre-Sampling", y="% 5mC", title="%5mC by Soil 1mo Pre-Sampling")+
geom_smooth(method="lm", se = FALSE) +
theme(plot.title = element_text(hjust = 0.5))+
xlim(0,3)+
ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 155 rows containing non-finite values (stat_smooth).
## Warning: Removed 155 rows containing missing values (geom_point).
Violin plot wrapping boxplot to visualize NO3 5yrs Pre-Sampling low vs high vs %5mC
#dnamb <- dnam %>% filter(!is.na(NO35yrCensor_dich))
#(dnamb %>% ggplot(aes(x=NO35yrCensor_dich, y=pct_5mC, fill=NO35yrCensor_dich))+
# geom_boxplot(width=0.2, color="black", alpha=1.0)+
# geom_violin(width=1.0, alpha=0.5)+
# labs(x="NO3 5yrs Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NO3 5yrs Pre-Sampling Low vs High")+
# theme_light()+
# theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
# scale_fill_brewer(type="seq", palette="YlOrRd")+
# ylim(0,0.25))
Violin plot wrapping boxplot to visualize NO3 5yrs pre-dx low vs high vs %5mC
#dnamb <- dnam %>% filter(!is.na(NO35yr_dich))
#(dnamb %>% ggplot(aes(x=NO35yr_dich, y=pct_5mC, fill=NO35yr_dich))+
# geom_boxplot(width=0.2, color="black", alpha=1.0)+
# geom_violin(width=1.0, alpha=0.5)+
# labs(x="NO3 1yr Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NO3 1yr Pre-Sampling Low vs High")+
# theme_light()+
# theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
# scale_fill_brewer(type="seq", palette="YlOrRd")+
# ylim(0,0.25))
Violin plot wrapping boxplot to visualize NH4 5yrs Pre-Sampling low vs high vs %5mC
#dnamb <- dnam %>% filter(!is.na(NH45yrCensor_dich))
#(dnamb %>% ggplot(aes(x=NH45yrCensor_dich, y=pct_5mC, fill=NH45yrCensor_dich))+
# geom_boxplot(width=0.2, color="black", alpha=1.0)+
# geom_violin(width=1.0, alpha=0.5)+
# labs(x="NH4 5yrs Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NH4 5yrs Pre-Sampling Low vs High")+
# theme_light()+
# theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
# scale_fill_brewer(type="seq", palette="YlOrRd")+
# ylim(0,0.25))
Violin plot wrapping boxplot to visualize NH4 5yrs pre-dx low vs high vs %5mC
#dnamb <- dnam %>% filter(!is.na(NH45yr_dich))
#(dnamb %>% ggplot(aes(x=NH45yr_dich, y=pct_5mC, fill=NH45yr_dich))+
# geom_boxplot(width=0.2, color="black", alpha=1.0)+
# geom_violin(width=1.0, alpha=0.5)+
# labs(x="NH4 1yr Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NH4 1yr Pre-Sampling Low vs High")+
# theme_light()+
# theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
# scale_fill_brewer(type="seq", palette="YlOrRd")+
# ylim(0,0.25))
###Continuous PM2.5 5yr Pre-Censor
dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0934 -0.3967 -0.0721 0.2850 5.3443
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.81077 0.11701 -58.209 <2e-16 ***
## PM_5yrPreSamp 0.01314 0.01389 0.946 0.344
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.6 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 0.002777
## Number of iterations: 215 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.04010171 -6.58144546
## PM_5yrPreSamp -0.01407672 0.04036428
## (phi) 1916.52576149 2368.69237872
Impact of sex on model
dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0824 -0.4035 -0.0728 0.2854 5.3383
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.81827 0.11803 -57.768 <2e-16 ***
## PM_5yrPreSamp 0.01338 0.01389 0.963 0.336
## sexFemale 0.02206 0.04717 0.468 0.640
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2143.3 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.003426
## Number of iterations: 720 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.04960352 -6.58693849
## PM_5yrPreSamp -0.01385524 0.04060739
## sexFemale -0.07038627 0.11450638
## (phi) 1917.12407737 2369.42710691
Impact of age_dx on model
dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + age_dx, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + age_dx, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1140 -0.3988 -0.0680 0.2808 5.3697
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.715667 0.213437 -31.464 <2e-16 ***
## PM_5yrPreSamp 0.013071 0.013889 0.941 0.347
## age_dx -0.001377 0.002589 -0.532 0.595
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2143.5 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.003644
## Number of iterations: 515 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.133996e+00 -6.297338e+00
## PM_5yrPreSamp -1.415025e-02 4.029250e-02
## age_dx -6.450539e-03 3.697292e-03
## (phi) 1.917298e+03 2.369640e+03
Impact of smokeHx on model
dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + smokeHx, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + smokeHx, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1297 -0.4123 -0.0766 0.2778 5.3355
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.79638 0.12019 -56.547 <2e-16 ***
## PM_5yrPreSamp 0.01321 0.01389 0.951 0.342
## smokeHxEver -0.02331 0.04272 -0.546 0.585
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2143.5 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.003651
## Number of iterations: 474 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.03194766 -6.56081064
## PM_5yrPreSamp -0.01401575 0.04043148
## smokeHxEver -0.10704887 0.06042998
## (phi) 1917.34421122 2369.69747431
Impact of race on model
dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + dich_Race, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + dich_Race, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0941 -0.3969 -0.0721 0.2849 5.3444
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.8108778 0.1178962 -57.770 <2e-16 ***
## PM_5yrPreSamp 0.0131611 0.0140862 0.934 0.350
## dich_RaceNon-White -0.0007183 0.0919231 -0.008 0.994
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.6 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.002778
## Number of iterations: 150 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.0419501 -6.57980537
## PM_5yrPreSamp -0.0144473 0.04076952
## dich_RaceNon-White -0.1808843 0.17944763
## (phi) 1916.5258892 2368.69253558
Impact of urbanicity on model
dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + metro, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1020 -0.3935 -0.0688 0.2842 5.3570
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.78465 0.12441 -54.533 <2e-16 ***
## PM_5yrPreSamp 0.01066 0.01447 0.737 0.461
## metromicropolitan -0.03226 0.07607 -0.424 0.672
## metrorural -0.03060 0.08223 -0.372 0.710
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.5 115.4 18.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4395 on 5 Df
## Pseudo R-squared: 0.003558
## Number of iterations: 271 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.02850159 -6.54080491
## PM_5yrPreSamp -0.01769165 0.03901512
## metromicropolitan -0.18135830 0.11684721
## metrorural -0.19177756 0.13056969
## (phi) 1916.24680205 2368.67427891
Partial model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1387 -0.4058 -0.0784 0.2810 5.3563
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.713922 0.217764 -30.831 <2e-16 ***
## PM_5yrPreSamp 0.013304 0.013896 0.957 0.338
## sexFemale 0.017245 0.047671 0.362 0.718
## age_dx -0.001293 0.002598 -0.498 0.619
## smokeHxEver -0.021535 0.043021 -0.501 0.617
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2144.8 115.5 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 6 Df
## Pseudo R-squared: 0.004893
## Number of iterations: 225 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.140731e+00 -6.287113e+00
## PM_5yrPreSamp -1.393286e-02 4.054033e-02
## sexFemale -7.618916e-02 1.106785e-01
## age_dx -6.384218e-03 3.798731e-03
## smokeHxEver -1.058546e-01 6.278466e-02
## (phi) 1.918478e+03 2.371090e+03
No significant association between PM_5yrPreSamp and prop_5mC in this model.
Complete model with age_dx, sex, smokeHx, race, urbanicity
dnam_model3 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx +
## dich_Race + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1533 -0.4031 -0.0802 0.2751 5.3672
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.692229 0.222963 -30.015 <2e-16 ***
## PM_5yrPreSamp 0.010820 0.014640 0.739 0.460
## sexFemale 0.016874 0.047773 0.353 0.724
## age_dx -0.001199 0.002604 -0.460 0.645
## smokeHxEver -0.023908 0.043118 -0.554 0.579
## dich_RaceNon-White -0.004606 0.092172 -0.050 0.960
## metromicropolitan -0.030009 0.076190 -0.394 0.694
## metrorural -0.035227 0.082435 -0.427 0.669
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2144.7 115.5 18.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4395 on 9 Df
## Pseudo R-squared: 0.005725
## Number of iterations: 156 (BFGS) + 3 (Fisher scoring)
confint(dnam_model3)
## 2.5 % 97.5 %
## (Intercept) -7.129228e+00 -6.255230e+00
## PM_5yrPreSamp -1.787368e-02 3.951281e-02
## sexFemale -7.675881e-02 1.105067e-01
## age_dx -6.301724e-03 3.904336e-03
## smokeHxEver -1.084168e-01 6.060122e-02
## dich_RaceNon-White -1.852603e-01 1.760482e-01
## metromicropolitan -1.793379e-01 1.193196e-01
## metrorural -1.967969e-01 1.263424e-01
## (phi) 1.918246e+03 2.371130e+03
No significant association between PM_5yrPreSamp and prop_5mC in this model.
dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1109 -0.4071 -0.0763 0.2810 5.3473
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.79952 0.10347 -65.712 <2e-16 ***
## PM_1yrPreSamp 0.01289 0.01336 0.965 0.335
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.7 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 0.002929
## Number of iterations: 5000 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.0023218 -6.5967111
## PM_1yrPreSamp -0.0133012 0.0390802
## (phi) 1916.6356029 2368.8271574
Impact of sex on model
dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + sex, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + sex, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0982 -0.3999 -0.0738 0.2811 5.3410
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.80760 0.10479 -64.961 <2e-16 ***
## PM_1yrPreSamp 0.01320 0.01338 0.987 0.324
## sexFemale 0.02266 0.04719 0.480 0.631
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2143.4 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.003613
## Number of iterations: 285 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.01299567 -6.60220831
## PM_1yrPreSamp -0.01301943 0.03942261
## sexFemale -0.06983010 0.11514772
## (phi) 1917.26631533 2369.60167981
Impact of age_dx on model
dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + age_dx, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + age_dx, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1333 -0.4007 -0.0732 0.2817 5.3724
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.705407 0.207456 -32.322 <2e-16 ***
## PM_1yrPreSamp 0.012769 0.013366 0.955 0.339
## age_dx -0.001358 0.002589 -0.524 0.600
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2143.6 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.003753
## Number of iterations: 357 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.112013e+00 -6.298801e+00
## PM_1yrPreSamp -1.342809e-02 3.896698e-02
## age_dx -6.432495e-03 3.717159e-03
## (phi) 1.917389e+03 2.369752e+03
Impact of smokeHx on model
dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + smokeHx, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + smokeHx, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1493 -0.4087 -0.0810 0.2831 5.3387
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.78488 0.10702 -63.397 <2e-16 ***
## PM_1yrPreSamp 0.01291 0.01336 0.966 0.334
## smokeHxEver -0.02308 0.04273 -0.540 0.589
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2143.6 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.003806
## Number of iterations: 274 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.99464424 -6.57512435
## PM_1yrPreSamp -0.01328436 0.03910143
## smokeHxEver -0.10681696 0.06066362
## (phi) 1917.43483409 2369.80859211
Impact of race on model
dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + dich_Race, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + dich_Race, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1111 -0.4072 -0.0763 0.2810 5.3474
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.7995895 0.1041294 -65.299 <2e-16 ***
## PM_1yrPreSamp 0.0129037 0.0135434 0.953 0.341
## dich_RaceNon-White -0.0006357 0.0918550 -0.007 0.994
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.7 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 4 Df
## Pseudo R-squared: 0.00293
## Number of iterations: 177 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.00367933 -6.59549963
## PM_1yrPreSamp -0.01364075 0.03944825
## dich_RaceNon-White -0.18066813 0.17939669
## (phi) 1916.63568858 2368.82726247
Impact of urbanicity on model
dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + metro, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1177 -0.3922 -0.0746 0.2870 5.3590
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.77512 0.11059 -61.265 <2e-16 ***
## PM_1yrPreSamp 0.01039 0.01395 0.745 0.456
## metromicropolitan -0.03171 0.07618 -0.416 0.677
## metrorural -0.03040 0.08224 -0.370 0.712
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.5 115.4 18.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4395 on 5 Df
## Pseudo R-squared: 0.003622
## Number of iterations: 656 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.99187109 -6.55837723
## PM_1yrPreSamp -0.01694166 0.03772986
## metromicropolitan -0.18101946 0.11759671
## metrorural -0.19159631 0.13079369
## (phi) 1916.28876554 2368.72575737
Partial model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1577 -0.3981 -0.0836 0.2781 5.3590
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.704267 0.212068 -31.614 <2e-16 ***
## PM_1yrPreSamp 0.013032 0.013384 0.974 0.330
## sexFemale 0.017901 0.047697 0.375 0.707
## age_dx -0.001271 0.002598 -0.489 0.625
## smokeHxEver -0.021241 0.043021 -0.494 0.621
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2144.9 115.5 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4402 on 6 Df
## Pseudo R-squared: 0.005034
## Number of iterations: 333 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.119913e+00 -6.288621e+00
## PM_1yrPreSamp -1.320018e-02 3.926347e-02
## sexFemale -7.558237e-02 1.113852e-01
## age_dx -6.363744e-03 3.821857e-03
## smokeHxEver -1.055617e-01 6.307937e-02
## (phi) 1.918578e+03 2.371213e+03
No significant association between PM_1yrPreSamp and prop_5mC in this model.
Complete model with age_dx, sex, smokeHx, race, urbanicity
dnam_model3 <- betareg(prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx +
## dich_Race + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1685 -0.3993 -0.0758 0.2756 5.3692
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.683713 0.216686 -30.845 <2e-16 ***
## PM_1yrPreSamp 0.010522 0.014118 0.745 0.456
## sexFemale 0.017410 0.047792 0.364 0.716
## age_dx -0.001184 0.002605 -0.454 0.650
## smokeHxEver -0.023639 0.043119 -0.548 0.584
## dich_RaceNon-White -0.004422 0.092118 -0.048 0.962
## metromicropolitan -0.029512 0.076288 -0.387 0.699
## metrorural -0.035065 0.082441 -0.425 0.671
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2144.7 115.5 18.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4395 on 9 Df
## Pseudo R-squared: 0.005782
## Number of iterations: 133 (BFGS) + 2 (Fisher scoring)
confint(dnam_model3)
## 2.5 % 97.5 %
## (Intercept) -7.108411e+00 -6.259016e+00
## PM_1yrPreSamp -1.714769e-02 3.819216e-02
## sexFemale -7.626092e-02 1.110812e-01
## age_dx -6.288497e-03 3.921366e-03
## smokeHxEver -1.081502e-01 6.087158e-02
## dich_RaceNon-White -1.849689e-01 1.761255e-01
## metromicropolitan -1.790345e-01 1.200097e-01
## metrorural -1.966460e-01 1.265169e-01
## (phi) 1.918279e+03 2.371170e+03
No significant association between PM_1yrPreSamp and prop_5mC in this model.
dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0934 -0.3967 -0.0721 0.2850 5.3443
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.81077 0.11701 -58.209 <2e-16 ***
## PM_5yrPreSamp 0.01314 0.01389 0.946 0.344
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.6 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 0.002777
## Number of iterations: 215 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -7.04010171 -6.58144546
## PM_5yrPreSamp -0.01407672 0.04036428
## (phi) 1916.52576149 2368.69237872
No significant association between PM_5yrPreSamp and prop_5mC in this model.
Partial model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1387 -0.4058 -0.0784 0.2810 5.3563
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.713922 0.217764 -30.831 <2e-16 ***
## PM_5yrPreSamp 0.013304 0.013896 0.957 0.338
## sexFemale 0.017245 0.047671 0.362 0.718
## age_dx -0.001293 0.002598 -0.498 0.619
## smokeHxEver -0.021535 0.043021 -0.501 0.617
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2144.8 115.5 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 6 Df
## Pseudo R-squared: 0.004893
## Number of iterations: 225 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.140731e+00 -6.287113e+00
## PM_5yrPreSamp -1.393286e-02 4.054033e-02
## sexFemale -7.618916e-02 1.106785e-01
## age_dx -6.384218e-03 3.798731e-03
## smokeHxEver -1.058546e-01 6.278466e-02
## (phi) 1.918478e+03 2.371090e+03
No significant association between PM_5yrPreSamp and prop_5mC in this model.
Complete model with age_dx, sex, smokeHx, race, urbanicity
dnam_model3 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
##
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx +
## dich_Race + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1533 -0.4031 -0.0802 0.2751 5.3672
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.692229 0.222963 -30.015 <2e-16 ***
## PM_5yrPreSamp 0.010820 0.014640 0.739 0.460
## sexFemale 0.016874 0.047773 0.353 0.724
## age_dx -0.001199 0.002604 -0.460 0.645
## smokeHxEver -0.023908 0.043118 -0.554 0.579
## dich_RaceNon-White -0.004606 0.092172 -0.050 0.960
## metromicropolitan -0.030009 0.076190 -0.394 0.694
## metrorural -0.035227 0.082435 -0.427 0.669
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2144.7 115.5 18.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4395 on 9 Df
## Pseudo R-squared: 0.005725
## Number of iterations: 156 (BFGS) + 3 (Fisher scoring)
confint(dnam_model3)
## 2.5 % 97.5 %
## (Intercept) -7.129228e+00 -6.255230e+00
## PM_5yrPreSamp -1.787368e-02 3.951281e-02
## sexFemale -7.675881e-02 1.105067e-01
## age_dx -6.301724e-03 3.904336e-03
## smokeHxEver -1.084168e-01 6.060122e-02
## dich_RaceNon-White -1.852603e-01 1.760482e-01
## metromicropolitan -1.793379e-01 1.193196e-01
## metrorural -1.967969e-01 1.263424e-01
## (phi) 1.918246e+03 2.371130e+03
No significant association between PM_5yrPreSamp and prop_5mC in this model.
dnam_model1 <- betareg(prop_5mC ~ PM_6moPreSamp, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0820 -0.3952 -0.0748 0.2728 5.3638
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.759250 0.090423 -74.751 <2e-16 ***
## PM_6moPreSamp 0.006491 0.011520 0.563 0.573
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2157.1 117.8 18.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4282 on 3 Df
## Pseudo R-squared: 0.001044
## Number of iterations: 2160 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.93647652 -6.5820237
## PM_6moPreSamp -0.01608737 0.0290689
## (phi) 1926.21409828 2388.0122850
No significant association between PM_6moPreSamp and prop_5mC in this model.
Partial model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1575 -0.3876 -0.0801 0.2805 5.3788
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.646045 0.205864 -32.284 <2e-16 ***
## PM_6moPreSamp 0.006454 0.011550 0.559 0.576
## sexFemale 0.006335 0.048713 0.130 0.897
## age_dx -0.001412 0.002618 -0.539 0.590
## smokeHxEver -0.027536 0.043564 -0.632 0.527
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2159.5 117.9 18.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4283 on 6 Df
## Pseudo R-squared: 0.003397
## Number of iterations: 86 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.049531e+00 -6.242558e+00
## PM_6moPreSamp -1.618385e-02 2.909232e-02
## sexFemale -8.914068e-02 1.018113e-01
## age_dx -6.543327e-03 3.719134e-03
## smokeHxEver -1.129206e-01 5.784837e-02
## (phi) 1.928313e+03 2.390597e+03
No significant association between PM_6moPreSamp and prop_5mC in this model.
Complete model with age_dx, sex, smokeHx, race, urbanicity
dnam_model3 <- betareg(prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
##
## Call:
## betareg(formula = prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx +
## dich_Race + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1572 -0.3906 -0.0763 0.2773 5.3877
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.629700 0.208498 -31.797 <2e-16 ***
## PM_6moPreSamp 0.004332 0.012006 0.361 0.718
## sexFemale 0.005841 0.048819 0.120 0.905
## age_dx -0.001314 0.002624 -0.501 0.616
## smokeHxEver -0.029603 0.043674 -0.678 0.498
## dich_RaceNon-White 0.008271 0.091605 0.090 0.928
## metromicropolitan -0.032440 0.077340 -0.419 0.675
## metrorural -0.034537 0.082502 -0.419 0.675
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2160 118 18.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4276 on 9 Df
## Pseudo R-squared: 0.004416
## Number of iterations: 158 (BFGS) + 4 (Fisher scoring)
confint(dnam_model3)
## 2.5 % 97.5 %
## (Intercept) -7.03834815 -6.221051e+00
## PM_6moPreSamp -0.01919819 2.786296e-02
## sexFemale -0.08984261 1.015238e-01
## age_dx -0.00645720 3.828396e-03
## smokeHxEver -0.11520271 5.599681e-02
## dich_RaceNon-White -0.17127119 1.878129e-01
## metromicropolitan -0.18402242 1.191431e-01
## metrorural -0.19623758 1.271636e-01
## (phi) 1928.20685526 2.390809e+03
No significant association between PM_6moPreSamp and prop_5mC in this model.
dnam_model1 <- betareg(prop_5mC ~ PM_3moPreSamp, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0748 -0.3977 -0.0704 0.2855 5.3533
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.745942 0.081576 -82.695 <2e-16 ***
## PM_3moPreSamp 0.005387 0.010406 0.518 0.605
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2146.3 116.4 18.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4338 on 3 Df
## Pseudo R-squared: 0.0008507
## Number of iterations: 879 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.90582786 -6.58605623
## PM_3moPreSamp -0.01500861 0.02578313
## (phi) 1918.09570173 2374.43953470
No significant association between PM_3moPreSamp and prop_5mC in this model.
Partial model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1420 -0.3905 -0.0779 0.2829 5.3678
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.635689 0.204009 -32.526 <2e-16 ***
## PM_3moPreSamp 0.005204 0.010431 0.499 0.618
## sexFemale 0.003833 0.048501 0.079 0.937
## age_dx -0.001333 0.002616 -0.510 0.610
## smokeHxEver -0.028777 0.043304 -0.665 0.506
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2148.5 116.5 18.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4338 on 6 Df
## Pseudo R-squared: 0.003115
## Number of iterations: 86 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.035540e+00 -6.235837e+00
## PM_3moPreSamp -1.523952e-02 2.564756e-02
## sexFemale -9.122724e-02 9.889350e-02
## age_dx -6.461128e-03 3.794611e-03
## smokeHxEver -1.136512e-01 5.609780e-02
## (phi) 1.920129e+03 2.376940e+03
No significant association between PM_3moPreSamp and prop_5mC in this model.
Complete model with age_dx, sex, smokeHx, race, urbanicity
dnam_model3 <- betareg(prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
##
## Call:
## betareg(formula = prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx +
## dich_Race + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1591 -0.3939 -0.0642 0.2837 5.3792
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.620824 0.205968 -32.145 <2e-16 ***
## PM_3moPreSamp 0.003726 0.010644 0.350 0.726
## sexFemale 0.003528 0.048581 0.073 0.942
## age_dx -0.001276 0.002622 -0.487 0.627
## smokeHxEver -0.031045 0.043403 -0.715 0.474
## dich_RaceNon-White 0.005997 0.091259 0.066 0.948
## metromicropolitan -0.032043 0.076177 -0.421 0.674
## metrorural -0.042457 0.081949 -0.518 0.604
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2149.0 116.6 18.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4332 on 9 Df
## Pseudo R-squared: 0.004439
## Number of iterations: 144 (BFGS) + 4 (Fisher scoring)
confint(dnam_model3)
## 2.5 % 97.5 %
## (Intercept) -7.024514e+00 -6.217135e+00
## PM_3moPreSamp -1.713528e-02 2.458680e-02
## sexFemale -9.168874e-02 9.874514e-02
## age_dx -6.414744e-03 3.862837e-03
## smokeHxEver -1.161136e-01 5.402423e-02
## dich_RaceNon-White -1.728670e-01 1.848611e-01
## metromicropolitan -1.813470e-01 1.172606e-01
## metrorural -2.030742e-01 1.181609e-01
## (phi) 1.920354e+03 2.377551e+03
No significant association between PM_3moPreSamp and prop_5mC in this model.
dnam_model1 <- betareg(prop_5mC ~ PM_1moPreSamp, dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0726 -0.3971 -0.0639 0.2809 5.3385
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.722037 0.076019 -88.426 <2e-16 ***
## PM_1moPreSamp 0.002840 0.009623 0.295 0.768
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2127.3 114.9 18.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4369 on 3 Df
## Pseudo R-squared: 0.0002713
## Number of iterations: 613 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.8710315 -6.57304347
## PM_1moPreSamp -0.0160201 0.02170076
## (phi) 1901.9886325 2352.53440447
No significant association between PM_1moPreSamp and prop_5mC in this model.
Partial model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1189 -0.3941 -0.0776 0.2718 5.3498
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.631128 0.202248 -32.787 <2e-16 ***
## PM_1moPreSamp 0.002846 0.009644 0.295 0.768
## sexFemale 0.015608 0.047845 0.326 0.744
## age_dx -0.001180 0.002620 -0.450 0.652
## smokeHxEver -0.021775 0.043234 -0.504 0.615
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2129 115 18.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4369 on 6 Df
## Pseudo R-squared: 0.002195
## Number of iterations: 254 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.027527e+00 -6.234729e+00
## PM_1moPreSamp -1.605698e-02 2.174809e-02
## sexFemale -7.816577e-02 1.093825e-01
## age_dx -6.314977e-03 3.954973e-03
## smokeHxEver -1.065119e-01 6.296159e-02
## (phi) 1.903729e+03 2.354673e+03
No significant association between PM_1moPreSamp and prop_5mC in this model.
Complete model with age_dx, sex, smokeHx, race, urbanicity
dnam_model3 <- betareg(prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
##
## Call:
## betareg(formula = prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx +
## dich_Race + metro, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1399 -0.3904 -0.0749 0.2745 5.3622
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.619314 0.203572 -32.516 <2e-16 ***
## PM_1moPreSamp 0.001641 0.009784 0.168 0.867
## sexFemale 0.015602 0.047948 0.325 0.745
## age_dx -0.001091 0.002625 -0.416 0.678
## smokeHxEver -0.024175 0.043324 -0.558 0.577
## dich_RaceNon-White 0.003114 0.091452 0.034 0.973
## metromicropolitan -0.039626 0.075655 -0.524 0.600
## metrorural -0.047253 0.081853 -0.577 0.564
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2130.1 115.2 18.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4363 on 9 Df
## Pseudo R-squared: 0.003964
## Number of iterations: 153 (BFGS) + 3 (Fisher scoring)
confint(dnam_model3)
## 2.5 % 97.5 %
## (Intercept) -7.018307e+00 -6.220321e+00
## PM_1moPreSamp -1.753538e-02 2.081713e-02
## sexFemale -7.837399e-02 1.095773e-01
## age_dx -6.236622e-03 4.053837e-03
## smokeHxEver -1.090879e-01 6.073764e-02
## dich_RaceNon-White -1.761295e-01 1.823570e-01
## metromicropolitan -1.879074e-01 1.086562e-01
## metrorural -2.076825e-01 1.131756e-01
## (phi) 1.904349e+03 2.355761e+03
No significant association between PM_1moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SO4_5yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0892 -0.3952 -0.0703 0.2762 5.3447
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.76638 0.07432 -91.045 <2e-16 ***
## SO4_5yrPreSamp 0.04424 0.04876 0.907 0.364
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.4 115.3 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 0.002578
## Number of iterations: 5000 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.91204137 -6.6207150
## SO4_5yrPreSamp -0.05132734 0.1398149
## (phi) 1916.34227006 2368.4671320
No significant association between SO4_5yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SO4_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1342 -0.4027 -0.0757 0.2774 5.3580
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.668598 0.199047 -33.503 <2e-16 ***
## SO4_5yrPreSamp 0.042210 0.048809 0.865 0.387
## sexFemale 0.015039 0.047654 0.316 0.752
## age_dx -0.001249 0.002599 -0.480 0.631
## smokeHxEver -0.020191 0.043070 -0.469 0.639
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2144.3 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 6 Df
## Pseudo R-squared: 0.004426
## Number of iterations: 365 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.058722e+00 -6.27847285
## SO4_5yrPreSamp -5.345490e-02 0.13787442
## sexFemale -7.836102e-02 0.10843943
## age_dx -6.342786e-03 0.00384573
## smokeHxEver -1.046065e-01 0.06422448
## (phi) 1.918030e+03 2370.54001805
No significant association between SO4_5yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SO4_1yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1380 -0.3999 -0.0628 0.2688 5.3355
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.79894 0.07751 -87.713 <2e-16 ***
## SO4_1yrPreSamp 0.08866 0.06714 1.321 0.187
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2135.5 115.3 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4376 on 3 Df
## Pseudo R-squared: 0.005371
## Number of iterations: 1068 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.95086658 -6.6470194
## SO4_1yrPreSamp -0.04293048 0.2202559
## (phi) 1909.48632079 2361.4149580
No significant association between SO4_1yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SO4_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1842 -0.3928 -0.0668 0.2749 5.3485
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.700623 0.198852 -33.696 <2e-16 ***
## SO4_1yrPreSamp 0.087698 0.067163 1.306 0.192
## sexFemale 0.015546 0.047828 0.325 0.745
## age_dx -0.001285 0.002602 -0.494 0.621
## smokeHxEver -0.020307 0.043143 -0.471 0.638
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2137.4 115.4 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4377 on 6 Df
## Pseudo R-squared: 0.00734
## Number of iterations: 274 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.090366e+00 -6.310879e+00
## SO4_1yrPreSamp -4.393938e-02 2.193344e-01
## sexFemale -7.819503e-02 1.092870e-01
## age_dx -6.384944e-03 3.814719e-03
## smokeHxEver -1.048662e-01 6.425219e-02
## (phi) 1.911242e+03 2.363572e+03
No significant association between SO4_1yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SO4_6moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0987 -0.3929 -0.0751 0.2734 5.3065
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.76915 0.07342 -92.198 <2e-16 ***
## SO4_6moPreSamp 0.06060 0.06505 0.932 0.352
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2112.6 116.9 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 3 Df
## Pseudo R-squared: 0.003063
## Number of iterations: 5000 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.91304860 -6.6252482
## SO4_6moPreSamp -0.06690139 0.1880926
## (phi) 1883.39659609 2341.7071950
No significant association between SO4_6moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SO4_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1645 -0.3944 -0.0696 0.2734 5.3213
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.659102 0.199519 -33.376 <2e-16 ***
## SO4_6moPreSamp 0.060058 0.065067 0.923 0.356
## sexFemale 0.003989 0.049412 0.081 0.936
## age_dx -0.001354 0.002645 -0.512 0.609
## smokeHxEver -0.027577 0.044334 -0.622 0.534
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2115 117 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4169 on 6 Df
## Pseudo R-squared: 0.005279
## Number of iterations: 307 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.050152e+00 -6.268051e+00
## SO4_6moPreSamp -6.747117e-02 1.875866e-01
## sexFemale -9.285614e-02 1.008334e-01
## age_dx -6.537529e-03 3.828911e-03
## smokeHxEver -1.144696e-01 5.931470e-02
## (phi) 1.885328e+03 2.344093e+03
No significant association between SO4_6moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SO4_3moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0862 -0.3801 -0.0591 0.2660 5.0351
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.68446 0.07473 -89.453 <2e-16 ***
## SO4_3moPreSamp 0.03003 0.06580 0.456 0.648
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1866.1 110.5 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 3 Df
## Pseudo R-squared: 0.0008348
## Number of iterations: 1705 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.83092254 -6.538001
## SO4_3moPreSamp -0.09894427 0.159000
## (phi) 1649.49370731 2082.620766
No significant association between SO4_3moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SO4_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1627 -0.3992 -0.0586 0.2692 5.0393
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6197749 0.2124773 -31.155 <2e-16 ***
## SO4_3moPreSamp 0.0306121 0.0658279 0.465 0.642
## sexFemale -0.0094719 0.0543310 -0.174 0.862
## age_dx -0.0005727 0.0028849 -0.199 0.843
## smokeHxEver -0.0374484 0.0488809 -0.766 0.444
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1868.1 110.6 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 6 Df
## Pseudo R-squared: 0.003239
## Number of iterations: 305 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.036223e+00 -6.20332715
## SO4_3moPreSamp -9.840814e-02 0.15963240
## sexFemale -1.159586e-01 0.09701486
## age_dx -6.226886e-03 0.00508153
## smokeHxEver -1.332533e-01 0.05835647
## (phi) 1.651310e+03 2084.89644145
No significant association between SO4_3moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SO4_1moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0340 -0.3976 -0.0658 0.2728 4.9454
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.646757 0.075024 -88.595 <2e-16 ***
## SO4_1moPreSamp 0.009376 0.066090 0.142 0.887
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1787.7 107.6 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 3 Df
## Pseudo R-squared: 8.369e-05
## Number of iterations: 465 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7938014 -6.499712
## SO4_1moPreSamp -0.1201581 0.138911
## (phi) 1576.8984016 1998.570166
No significant association between SO4_1moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SO4_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SO4_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0669 -0.4024 -0.0671 0.2745 4.9472
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.5876885 0.2175829 -30.277 <2e-16 ***
## SO4_1moPreSamp 0.0099026 0.0661027 0.150 0.881
## sexFemale -0.0007889 0.0552074 -0.014 0.989
## age_dx -0.0005254 0.0029701 -0.177 0.860
## smokeHxEver -0.0365086 0.0504846 -0.723 0.470
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1789.6 107.7 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 6 Df
## Pseudo R-squared: 0.002302
## Number of iterations: 216 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.014143e+00 -6.161234e+00
## SO4_1moPreSamp -1.196563e-01 1.394616e-01
## sexFemale -1.089934e-01 1.074156e-01
## age_dx -6.346717e-03 5.296005e-03
## smokeHxEver -1.354567e-01 6.243944e-02
## (phi) 1.578524e+03 2.000615e+03
No significant association between SO4_1moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NO3_5yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.3435 -0.4032 -0.0508 0.2943 5.4147
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.76826 0.04216 -160.545 <2e-16 ***
## NO3_5yrPreSamp 0.07214 0.03853 1.872 0.0612 .
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2150.6 115.8 18.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4402 on 3 Df
## Pseudo R-squared: 0.01097
## Number of iterations: 5000 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.850886e+00 -6.6856298
## NO3_5yrPreSamp -3.376669e-03 0.1476661
## (phi) 1.923705e+03 2377.5067887
Nitrate is marginally associated with higher global DNAm in this base model.
Adjuted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NO3_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.3222 -0.3964 -0.0593 0.2963 5.4232
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.686531 0.188612 -35.451 <2e-16 ***
## NO3_5yrPreSamp 0.071439 0.038593 1.851 0.0642 .
## sexFemale 0.017015 0.047579 0.358 0.7206
## age_dx -0.001049 0.002601 -0.403 0.6868
## smokeHxEver -0.020931 0.042964 -0.487 0.6261
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2152.5 115.9 18.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4403 on 6 Df
## Pseudo R-squared: 0.01275
## Number of iterations: 373 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.056204e+00 -6.316859e+00
## NO3_5yrPreSamp -4.200898e-03 1.470796e-01
## sexFemale -7.623892e-02 1.102688e-01
## age_dx -6.145934e-03 4.048761e-03
## smokeHxEver -1.051387e-01 6.327709e-02
## (phi) 1.925364e+03 2.379544e+03
Nitrate is marginally associated with higher global DNAm in this base model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NO3_1yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.4427 -0.4082 -0.0509 0.2934 5.4263
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.77738 0.04117 -164.611 <2e-16 ***
## NO3_1yrPreSamp 0.10093 0.04508 2.239 0.0252 *
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2145.5 115.8 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4378 on 3 Df
## Pseudo R-squared: 0.01595
## Number of iterations: 1089 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.85807589 -6.6966847
## NO3_1yrPreSamp 0.01257504 0.1892856
## (phi) 1918.48789591 2372.4708814
Higher nitrate is associated with higher global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NO3_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.4199 -0.3957 -0.0516 0.2941 5.4336
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.7022647 0.1890792 -35.447 <2e-16 ***
## NO3_1yrPreSamp 0.1001976 0.0451771 2.218 0.0266 *
## sexFemale 0.0184402 0.0477476 0.386 0.6993
## age_dx -0.0009692 0.0026074 -0.372 0.7101
## smokeHxEver -0.0198825 0.0430671 -0.462 0.6443
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2147.3 115.9 18.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4378 on 6 Df
## Pseudo R-squared: 0.01763
## Number of iterations: 501 (BFGS) + 5 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.072853e+00 -6.331676e+00
## NO3_1yrPreSamp 1.165209e-02 1.887430e-01
## sexFemale -7.514352e-02 1.120238e-01
## age_dx -6.079582e-03 4.141149e-03
## smokeHxEver -1.042924e-01 6.452748e-02
## (phi) 1.920082e+03 2.374430e+03
Higher nitrate is associated with higher global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NO3_6moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1621 -0.3978 -0.0656 0.2902 5.3539
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.73425 0.03496 -192.612 <2e-16 ***
## NO3_6moPreSamp 0.03681 0.03184 1.156 0.248
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2114 117 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4169 on 3 Df
## Pseudo R-squared: 0.004742
## Number of iterations: 269 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.80277289 -6.66572146
## NO3_6moPreSamp -0.02560227 0.09921783
## (phi) 1884.73539756 2343.35973604
No assoc of nitrate with global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NO3_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1943 -0.3965 -0.0704 0.2882 5.3698
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.623085 0.188999 -35.043 <2e-16 ***
## NO3_6moPreSamp 0.036051 0.031924 1.129 0.259
## sexFemale 0.004530 0.049428 0.092 0.927
## age_dx -0.001394 0.002649 -0.526 0.599
## smokeHxEver -0.025160 0.044374 -0.567 0.571
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2116.0 117.1 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4169 on 6 Df
## Pseudo R-squared: 0.006826
## Number of iterations: 547 (BFGS) + 6 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.993516e+00 -6.252654e+00
## NO3_6moPreSamp -2.651904e-02 9.862158e-02
## sexFemale -9.234657e-02 1.014075e-01
## age_dx -6.585967e-03 3.797384e-03
## smokeHxEver -1.121313e-01 6.181226e-02
## (phi) 1.886525e+03 2.345570e+03
No assoc of nitrate with global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NO3_3moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0763 -0.3982 -0.0680 0.2768 5.0419
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.66682 0.03525 -189.143 <2e-16 ***
## NO3_3moPreSamp 0.01893 0.03149 0.601 0.548
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1866.5 110.5 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 3 Df
## Pseudo R-squared: 0.001384
## Number of iterations: 704 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7359016 -6.597734
## NO3_3moPreSamp -0.0427905 0.080653
## (phi) 1649.9126823 2083.145562
No association between nitrate and global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NO3_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1357 -0.4020 -0.0571 0.2715 5.0451
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6075956 0.2041558 -32.365 <2e-16 ***
## NO3_3moPreSamp 0.0182192 0.0315548 0.577 0.564
## sexFemale -0.0084106 0.0543709 -0.155 0.877
## age_dx -0.0004864 0.0028890 -0.168 0.866
## smokeHxEver -0.0367234 0.0488811 -0.751 0.452
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1868.5 110.6 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 6 Df
## Pseudo R-squared: 0.003666
## Number of iterations: 164 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.007734e+00 -6.207458e+00
## NO3_3moPreSamp -4.362708e-02 8.006540e-02
## sexFemale -1.149755e-01 9.815440e-02
## age_dx -6.148821e-03 5.176041e-03
## smokeHxEver -1.325286e-01 5.908184e-02
## (phi) 1.651622e+03 2.085287e+03
No association between nitrate and global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NO3_1moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0355 -0.4031 -0.0654 0.2738 4.9471
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6369180 0.0367755 -180.471 <2e-16 ***
## NO3_1moPreSamp 0.0001452 0.0346114 0.004 0.997
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1787.7 107.6 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 3 Df
## Pseudo R-squared: 6.847e-08
## Number of iterations: 1906 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.70899672 -6.5648393
## NO3_1moPreSamp -0.06769194 0.0679824
## (phi) 1576.84017979 1998.4969771
No association between nitrate and global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NO3_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NO3_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0818 -0.4059 -0.0679 0.2752 4.9490
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.5784931 0.2096448 -31.379 <2e-16 ***
## NO3_1moPreSamp 0.0005675 0.0346469 0.016 0.987
## sexFemale -0.0009629 0.0552040 -0.017 0.986
## age_dx -0.0005114 0.0029732 -0.172 0.863
## smokeHxEver -0.0365340 0.0504934 -0.724 0.469
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1789.5 107.7 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 6 Df
## Pseudo R-squared: 0.002213
## Number of iterations: 125 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.989389e+00 -6.167597e+00
## NO3_1moPreSamp -6.733931e-02 6.847422e-02
## sexFemale -1.091607e-01 1.072349e-01
## age_dx -6.338706e-03 5.315905e-03
## smokeHxEver -1.354992e-01 6.243123e-02
## (phi) 1.578460e+03 2.000533e+03
No association between nitrate and global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NH4_5yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1488 -0.3940 -0.0575 0.2971 5.3882
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.78685 0.05145 -131.908 <2e-16 ***
## NH4_5yrPreSamp 0.17700 0.09601 1.844 0.0652 .
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2150.2 115.7 18.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4402 on 3 Df
## Pseudo R-squared: 0.01036
## Number of iterations: 3623 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.8876896 -6.6860046
## NH4_5yrPreSamp -0.0111714 0.3651737
## (phi) 1923.3501515 2377.0720027
Higher NH4 marginally associated with higher global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NH4_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1685 -0.4036 -0.0593 0.2770 5.3962
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.7127980 0.1929334 -34.793 <2e-16 ***
## NH4_5yrPreSamp 0.1715980 0.0963726 1.781 0.075 .
## sexFemale 0.0142767 0.0475909 0.300 0.764
## age_dx -0.0009185 0.0026048 -0.353 0.724
## smokeHxEver -0.0187735 0.0430170 -0.436 0.663
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2151.6 115.8 18.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4403 on 6 Df
## Pseudo R-squared: 0.01174
## Number of iterations: 150 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.090941e+00 -6.334655e+00
## NH4_5yrPreSamp -1.728887e-02 3.604849e-01
## sexFemale -7.899988e-02 1.075532e-01
## age_dx -6.023946e-03 4.186848e-03
## smokeHxEver -1.030853e-01 6.553827e-02
## (phi) 1.924606e+03 2.378614e+03
Higher NH4 marginally associated with higher global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NH4_1yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.2285 -0.3894 -0.0526 0.2861 5.3791
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.77129 0.04509 -150.18 <2e-16 ***
## NH4_1yrPreSamp 0.25222 0.13935 1.81 0.0703 .
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2140.0 115.5 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4377 on 3 Df
## Pseudo R-squared: 0.009959
## Number of iterations: 641 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.85965816 -6.6829198
## NH4_1yrPreSamp -0.02089795 0.5253479
## (phi) 1913.57989964 2366.4435174
Higher NH4 marginally associated with higher global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NH4_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.2094 -0.3837 -0.0633 0.2862 5.3896
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.687778 0.189614 -35.271 <2e-16 ***
## NH4_1yrPreSamp 0.246605 0.139606 1.766 0.0773 .
## sexFemale 0.015851 0.047791 0.332 0.7401
## age_dx -0.001085 0.002607 -0.416 0.6772
## smokeHxEver -0.017962 0.043140 -0.416 0.6771
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2141.6 115.6 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4377 on 6 Df
## Pseudo R-squared: 0.01164
## Number of iterations: 268 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.059414e+00 -6.316142e+00
## NH4_1yrPreSamp -2.701741e-02 5.202272e-01
## sexFemale -7.781759e-02 1.095198e-01
## age_dx -6.194239e-03 4.024194e-03
## smokeHxEver -1.025148e-01 6.659066e-02
## (phi) 1.914987e+03 2.368172e+03
Higher NH4 marginally associated with higher global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NH4_6moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0588 -0.3997 -0.0670 0.2814 5.3270
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.72360 0.03929 -171.111 <2e-16 ***
## NH4_6moPreSamp 0.06665 0.10977 0.607 0.544
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2110.9 116.8 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 3 Df
## Pseudo R-squared: 0.001321
## Number of iterations: 1439 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.8006102 -6.6465813
## NH4_6moPreSamp -0.1484991 0.2817968
## (phi) 1881.9369985 2339.9046836
Higher NH4 not associated with global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NH4_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1161 -0.3919 -0.0731 0.2818 5.3425
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.611737 0.189560 -34.879 <2e-16 ***
## NH4_6moPreSamp 0.062263 0.110190 0.565 0.572
## sexFemale 0.003808 0.049437 0.077 0.939
## age_dx -0.001384 0.002649 -0.523 0.601
## smokeHxEver -0.026053 0.044430 -0.586 0.558
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2113.0 116.9 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 6 Df
## Pseudo R-squared: 0.00347
## Number of iterations: 345 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.983267e+00 -6.240207e+00
## NH4_6moPreSamp -1.537054e-01 2.782309e-01
## sexFemale -9.308685e-02 1.007023e-01
## age_dx -6.575586e-03 3.807315e-03
## smokeHxEver -1.131343e-01 6.102747e-02
## (phi) 1.883758e+03 2.342154e+03
Higher NH4 not associated with global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NH4_3moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0794 -0.4001 -0.0674 0.2759 5.0393
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.66813 0.03927 -169.800 <2e-16 ***
## NH4_3moPreSamp 0.05752 0.10835 0.531 0.596
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1866.3 110.5 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 3 Df
## Pseudo R-squared: 0.001082
## Number of iterations: 1019 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.745101 -6.5911634
## NH4_3moPreSamp -0.154847 0.2698831
## (phi) 1649.688564 2082.8648333
Higher NH4 not associated with global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NH4_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1502 -0.4042 -0.0573 0.2652 5.0430
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6084645 0.2051993 -32.205 <2e-16 ***
## NH4_3moPreSamp 0.0533942 0.1086784 0.491 0.623
## sexFemale -0.0088638 0.0543484 -0.163 0.870
## age_dx -0.0004866 0.0028885 -0.168 0.866
## smokeHxEver -0.0362977 0.0488935 -0.742 0.458
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1868.2 110.6 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 6 Df
## Pseudo R-squared: 0.003351
## Number of iterations: 190 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.010648e+00 -6.206281e+00
## NH4_3moPreSamp -1.596116e-01 2.664000e-01
## sexFemale -1.153848e-01 9.765716e-02
## age_dx -6.147989e-03 5.174696e-03
## smokeHxEver -1.321273e-01 5.953186e-02
## (phi) 1.651356e+03 2.084953e+03
Higher NH4 not associated with global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ NH4_1moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0354 -0.4029 -0.0651 0.2740 4.9468
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.637143 0.039647 -167.408 <2e-16 ***
## NH4_1moPreSamp 0.001264 0.111966 0.011 0.991
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1787.7 107.6 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 3 Df
## Pseudo R-squared: 5.137e-07
## Number of iterations: 270 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7148489 -6.5594372
## NH4_1moPreSamp -0.2181853 0.2207123
## (phi) 1576.8404943 1998.4973724
Higher NH4 not associated with global DNAm in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ NH4_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ NH4_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0807 -0.4062 -0.0683 0.2747 4.9488
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.5777754 0.2101242 -31.304 <2e-16 ***
## NH4_1moPreSamp -0.0004349 0.1121467 -0.004 0.997
## sexFemale -0.0009723 0.0552015 -0.018 0.986
## age_dx -0.0005143 0.0029722 -0.173 0.863
## smokeHxEver -0.0365109 0.0504852 -0.723 0.470
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1789.5 107.7 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 6 Df
## Pseudo R-squared: 0.002212
## Number of iterations: 203 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.989611e+00 -6.165939e+00
## NH4_1moPreSamp -2.202384e-01 2.193686e-01
## sexFemale -1.091653e-01 1.072207e-01
## age_dx -6.339663e-03 5.311005e-03
## smokeHxEver -1.354600e-01 6.243819e-02
## (phi) 1.578459e+03 2.000532e+03
Higher NH4 not associated with global DNAm in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ BC_5yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ BC_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0762 -0.3996 -0.0655 0.2816 5.3468
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.73713 0.06805 -99.002 <2e-16 ***
## BC_5yrPreSamp 0.05666 0.10426 0.543 0.587
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2140.8 115.3 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 0.0008914
## Number of iterations: 417 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.8705094 -6.6037560
## BC_5yrPreSamp -0.1476886 0.2610144
## (phi) 1914.8608419 2366.6480173
No significant association between BC_5yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ BC_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ BC_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1240 -0.4023 -0.0760 0.2732 5.3592
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.637310 0.194808 -34.071 <2e-16 ***
## BC_5yrPreSamp 0.059213 0.104343 0.567 0.570
## sexFemale 0.016935 0.047714 0.355 0.723
## age_dx -0.001340 0.002599 -0.516 0.606
## smokeHxEver -0.021330 0.043039 -0.496 0.620
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.9 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 6 Df
## Pseudo R-squared: 0.003019
## Number of iterations: 303 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.019126e+00 -6.255494e+00
## BC_5yrPreSamp -1.452962e-01 2.637227e-01
## sexFemale -7.658207e-02 1.104526e-01
## age_dx -6.433826e-03 3.753028e-03
## smokeHxEver -1.056842e-01 6.302484e-02
## (phi) 1.916831e+03 2.369067e+03
No significant association between BC_5yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ BC_1yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ BC_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0738 -0.3922 -0.0673 0.2754 5.3467
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.704222 0.073541 -91.16 <2e-16 ***
## BC_1yrPreSamp 0.004687 0.117743 0.04 0.968
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2130 115 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4375 on 3 Df
## Pseudo R-squared: 5.032e-06
## Number of iterations: 634 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.8483601 -6.5600841
## BC_1yrPreSamp -0.2260847 0.2354577
## (phi) 1904.6991624 2355.5331992
No significant association between BC_1yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ BC_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ BC_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1226 -0.3988 -0.0727 0.2705 5.3595
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.603723 0.196535 -33.601 <2e-16 ***
## BC_1yrPreSamp 0.005677 0.117746 0.048 0.962
## sexFemale 0.015400 0.047872 0.322 0.748
## age_dx -0.001330 0.002606 -0.510 0.610
## smokeHxEver -0.021433 0.043186 -0.496 0.620
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2132.2 115.1 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4376 on 6 Df
## Pseudo R-squared: 0.002069
## Number of iterations: 216 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.988925e+00 -6.218520e+00
## BC_1yrPreSamp -2.251022e-01 2.364556e-01
## sexFemale -7.842704e-02 1.092270e-01
## age_dx -6.438391e-03 3.778581e-03
## smokeHxEver -1.060762e-01 6.320951e-02
## (phi) 1.906566e+03 2.357827e+03
No significant association between BC_1yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ BC_6moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ BC_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0733 -0.3898 -0.0622 0.2730 5.3295
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.67222 0.06107 -109.247 <2e-16 ***
## BC_6moPreSamp -0.05189 0.09080 -0.571 0.568
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2110.8 116.8 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 3 Df
## Pseudo R-squared: 0.001181
## Number of iterations: 417 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7919213 -6.5525135
## BC_6moPreSamp -0.2298576 0.1260793
## (phi) 1881.8241353 2339.7654183
No significant association between BC_6moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ BC_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ BC_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0892 -0.3864 -0.0644 0.2765 5.3422
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.568496 0.193328 -33.976 <2e-16 ***
## BC_6moPreSamp -0.049928 0.091005 -0.549 0.583
## sexFemale 0.002725 0.049446 0.055 0.956
## age_dx -0.001273 0.002652 -0.480 0.631
## smokeHxEver -0.028711 0.044361 -0.647 0.517
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2112.9 116.9 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 6 Df
## Pseudo R-squared: 0.003391
## Number of iterations: 242 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.94741302 -6.189580e+00
## BC_6moPreSamp -0.22829472 1.284384e-01
## sexFemale -0.09418784 9.963843e-02
## age_dx -0.00647144 3.925068e-03
## smokeHxEver -0.11565760 5.823464e-02
## (phi) 1883.72066410 2.342107e+03
No significant association between BC_6moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ BC_3moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ BC_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1362 -0.3898 -0.0609 0.2757 5.0379
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.69549 0.06489 -103.181 <2e-16 ***
## BC_3moPreSamp 0.07272 0.10040 0.724 0.469
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1867.1 110.6 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 3 Df
## Pseudo R-squared: 0.002054
## Number of iterations: 1264 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.8226739 -6.5683068
## BC_3moPreSamp -0.1240575 0.2694969
## (phi) 1650.3998993 2083.7559858
No significant association between BC_3moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ BC_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ BC_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.2169 -0.3994 -0.0541 0.2852 5.0416
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6333093 0.2109832 -31.440 <2e-16 ***
## BC_3moPreSamp 0.0747411 0.1004006 0.744 0.457
## sexFemale -0.0099368 0.0543158 -0.183 0.855
## age_dx -0.0005352 0.0028855 -0.185 0.853
## smokeHxEver -0.0382629 0.0488702 -0.783 0.434
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1869.2 110.7 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 6 Df
## Pseudo R-squared: 0.004468
## Number of iterations: 242 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.046829e+00 -6.219790e+00
## BC_3moPreSamp -1.220405e-01 2.715228e-01
## sexFemale -1.163939e-01 9.652030e-02
## age_dx -6.190719e-03 5.120365e-03
## smokeHxEver -1.340468e-01 5.752102e-02
## (phi) 1.652287e+03 2.086121e+03
No significant association between BC_3moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ BC_1moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ BC_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0184 -0.4017 -0.0608 0.2672 4.9455
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.67446 0.06353 -105.061 <2e-16 ***
## BC_1moPreSamp 0.06410 0.09790 0.655 0.513
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1789.0 107.6 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 3 Df
## Pseudo R-squared: 0.001655
## Number of iterations: 1083 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7989713 -6.5499406
## BC_1moPreSamp -0.1277916 0.2559862
## (phi) 1578.0419781 2000.0078659
No significant association between BC_1moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ BC_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ BC_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0378 -0.4050 -0.0573 0.2834 4.9465
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6174022 0.2156676 -30.683 <2e-16 ***
## BC_1moPreSamp 0.0672957 0.0979361 0.687 0.492
## sexFemale -0.0002692 0.0551875 -0.005 0.996
## age_dx -0.0005044 0.0029693 -0.170 0.865
## smokeHxEver -0.0378730 0.0504746 -0.750 0.453
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1791.0 107.8 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3500 on 6 Df
## Pseudo R-squared: 0.003916
## Number of iterations: 196 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.040103e+00 -6.194701e+00
## BC_1moPreSamp -1.246556e-01 2.592469e-01
## sexFemale -1.084347e-01 1.078964e-01
## age_dx -6.324207e-03 5.315352e-03
## smokeHxEver -1.368014e-01 6.105546e-02
## (phi) 1.579796e+03 2.002213e+03
No significant association between BC_1moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ OM_5yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ OM_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0751 -0.3923 -0.0665 0.2761 5.3599
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.705251 0.070833 -94.663 <2e-16 ***
## OM_5yrPreSamp 0.001038 0.023716 0.044 0.965
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2139.9 115.2 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 5.956e-06
## Number of iterations: 620 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.84408095 -6.5664218
## OM_5yrPreSamp -0.04544473 0.0475205
## (phi) 1914.06415810 2365.6697541
No significant association between OM_5yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ OM_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ OM_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1242 -0.3995 -0.0772 0.2704 5.3726
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.605964 0.196330 -33.647 <2e-16 ***
## OM_5yrPreSamp 0.001672 0.023746 0.070 0.944
## sexFemale 0.015779 0.047752 0.330 0.741
## age_dx -0.001330 0.002600 -0.512 0.609
## smokeHxEver -0.021521 0.043042 -0.500 0.617
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.0 115.3 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 6 Df
## Pseudo R-squared: 0.0021
## Number of iterations: 112 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.990764e+00 -6.221163e+00
## OM_5yrPreSamp -4.486938e-02 4.821437e-02
## sexFemale -7.781207e-02 1.093708e-01
## age_dx -6.425473e-03 3.765746e-03
## smokeHxEver -1.058823e-01 6.284053e-02
## (phi) 1.915965e+03 2.368004e+03
No significant association between OM_5yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ OM_1yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ OM_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0732 -0.3830 -0.0651 0.2788 5.3650
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.66790 0.06916 -96.413 <2e-16 ***
## OM_1yrPreSamp -0.01111 0.02168 -0.513 0.608
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2130.9 115.1 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4376 on 3 Df
## Pseudo R-squared: 0.0008603
## Number of iterations: 88 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.80345291 -6.5323517
## OM_1yrPreSamp -0.05361151 0.0313841
## (phi) 1905.43479829 2356.4369761
No significant association between OM_1yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ OM_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ OM_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1018 -0.3876 -0.0690 0.2764 5.3784
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.567592 0.196053 -33.499 <2e-16 ***
## OM_1yrPreSamp -0.010790 0.021721 -0.497 0.619
## sexFemale 0.014044 0.047938 0.293 0.770
## age_dx -0.001325 0.002607 -0.508 0.611
## smokeHxEver -0.021765 0.043166 -0.504 0.614
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2133.0 115.2 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4376 on 6 Df
## Pseudo R-squared: 0.002871
## Number of iterations: 346 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.951848e+00 -6.183336e+00
## OM_1yrPreSamp -5.336236e-02 3.178289e-02
## sexFemale -7.991235e-02 1.080005e-01
## age_dx -6.433739e-03 3.784272e-03
## smokeHxEver -1.063690e-01 6.283806e-02
## (phi) 1.907254e+03 2.358672e+03
No significant association between OM_1yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ OM_6moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ OM_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0752 -0.3761 -0.0529 0.2772 5.3540
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.64928 0.05982 -111.2 <2e-16 ***
## OM_6moPreSamp -0.01760 0.01761 -1.0 0.318
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2113.0 116.9 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 3 Df
## Pseudo R-squared: 0.003534
## Number of iterations: 740 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.76652082 -6.53203180
## OM_6moPreSamp -0.05210588 0.01690988
## (phi) 1883.79643121 2342.20111613
No significant association between OM_6moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ OM_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ OM_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0727 -0.3803 -0.0619 0.2743 5.3683
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.543505 0.194078 -33.716 <2e-16 ***
## OM_6moPreSamp -0.017468 0.017669 -0.989 0.323
## sexFemale -0.000434 0.049536 -0.009 0.993
## age_dx -0.001274 0.002650 -0.481 0.631
## smokeHxEver -0.029316 0.044326 -0.661 0.508
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2115.1 117.1 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4169 on 6 Df
## Pseudo R-squared: 0.00567
## Number of iterations: 242 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.92389028 -6.163120e+00
## OM_6moPreSamp -0.05209857 1.716263e-02
## sexFemale -0.09752197 9.665405e-02
## age_dx -0.00646855 3.920306e-03
## smokeHxEver -0.11619313 5.756049e-02
## (phi) 1885.70778639 2.344562e+03
No significant association between OM_6moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ OM_3moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ OM_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0648 -0.3936 -0.0608 0.2796 5.0418
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6553839 0.0621073 -107.159 <2e-16 ***
## OM_3moPreSamp 0.0009425 0.0186360 0.051 0.96
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1865.4 110.5 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 3 Df
## Pseudo R-squared: 1.05e-05
## Number of iterations: 263 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.77711203 -6.53365578
## OM_3moPreSamp -0.03558341 0.03746848
## (phi) 1648.89342679 2081.86878297
No significant association between OM_3moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ OM_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ OM_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1402 -0.3956 -0.0626 0.2714 5.0461
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.592241 0.210304 -31.346 <2e-16 ***
## OM_3moPreSamp 0.001130 0.018654 0.061 0.952
## sexFemale -0.009525 0.054375 -0.175 0.861
## age_dx -0.000550 0.002887 -0.191 0.849
## smokeHxEver -0.037345 0.048899 -0.764 0.445
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1867.4 110.6 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 6 Df
## Pseudo R-squared: 0.002389
## Number of iterations: 164 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -7.004429e+00 -6.180054e+00
## OM_3moPreSamp -3.543155e-02 3.769071e-02
## sexFemale -1.160969e-01 9.704770e-02
## age_dx -6.208551e-03 5.108466e-03
## smokeHxEver -1.331857e-01 5.849539e-02
## (phi) 1.650690e+03 2.084119e+03
No significant association between OM_3moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ OM_1moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ OM_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0359 -0.4023 -0.0649 0.2739 4.9475
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.6360060 0.0602069 -110.220 <2e-16 ***
## OM_1moPreSamp -0.0002649 0.0177554 -0.015 0.988
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1787.7 107.6 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 3 Df
## Pseudo R-squared: 9.544e-07
## Number of iterations: 270 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.75400939 -6.51800259
## OM_1moPreSamp -0.03506479 0.03453505
## (phi) 1576.84078287 1998.49773516
No significant association between OM_1moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ OM_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ OM_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0815 -0.4056 -0.0684 0.2754 4.9492
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.5786296 0.2144246 -30.680 <2e-16 ***
## OM_1moPreSamp 0.0002388 0.0177909 0.013 0.989
## sexFemale -0.0009271 0.0552865 -0.017 0.987
## age_dx -0.0005141 0.0029707 -0.173 0.863
## smokeHxEver -0.0365293 0.0505062 -0.723 0.470
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1789.5 107.7 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 6 Df
## Pseudo R-squared: 0.002213
## Number of iterations: 137 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.99889405 -6.158365e+00
## OM_1moPreSamp -0.03463082 3.510837e-02
## sexFemale -0.10928656 1.074324e-01
## age_dx -0.00633657 5.308324e-03
## smokeHxEver -0.13551959 6.246097e-02
## (phi) 1578.45965171 2.000533e+03
No significant association between OM_1moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SS_5yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SS_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0720 -0.3899 -0.0578 0.2834 5.3641
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.69654 0.02985 -224.373 <2e-16 ***
## SS_5yrPreSamp -0.01611 0.05450 -0.296 0.768
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2140.1 115.2 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 0.0002811
## Number of iterations: 1089 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7550382 -6.63804567
## SS_5yrPreSamp -0.1229363 0.09071838
## (phi) 1914.3040988 2365.96438065
No significant association between SS_5yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SS_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SS_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1199 -0.4043 -0.0707 0.2717 5.3763
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.598356 0.183839 -35.892 <2e-16 ***
## SS_5yrPreSamp -0.013103 0.054635 -0.240 0.810
## sexFemale 0.015320 0.047691 0.321 0.748
## age_dx -0.001307 0.002604 -0.502 0.616
## smokeHxEver -0.020985 0.043096 -0.487 0.626
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.2 115.3 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 6 Df
## Pseudo R-squared: 0.002256
## Number of iterations: 242 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.958675e+00 -6.238037e+00
## SS_5yrPreSamp -1.201858e-01 9.397972e-02
## sexFemale -7.815213e-02 1.087927e-01
## age_dx -6.409553e-03 3.796226e-03
## smokeHxEver -1.054509e-01 6.348041e-02
## (phi) 1.916116e+03 2.368189e+03
No significant association between SS_5yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SS_1yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SS_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0718 -0.3909 -0.0585 0.2850 5.3497
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.697963 0.028850 -232.166 <2e-16 ***
## SS_1yrPreSamp -0.008935 0.045803 -0.195 0.845
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2130 115 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4375 on 3 Df
## Pseudo R-squared: 0.000129
## Number of iterations: 2391 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.75450808 -6.64141866
## SS_1yrPreSamp -0.09870806 0.08083783
## (phi) 1904.80359011 2355.66149111
No significant association between SS_1yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SS_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SS_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1197 -0.4005 -0.0724 0.2704 5.3623
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.598585 0.184376 -35.789 <2e-16 ***
## SS_1yrPreSamp -0.006818 0.045881 -0.149 0.882
## sexFemale 0.015233 0.047892 0.318 0.750
## age_dx -0.001319 0.002608 -0.506 0.613
## smokeHxEver -0.021110 0.043237 -0.488 0.625
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2132.3 115.1 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4376 on 6 Df
## Pseudo R-squared: 0.002132
## Number of iterations: 320 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.959956e+00 -6.237214e+00
## SS_1yrPreSamp -9.674419e-02 8.310787e-02
## sexFemale -7.863385e-02 1.090989e-01
## age_dx -6.430609e-03 3.791821e-03
## smokeHxEver -1.058518e-01 6.363260e-02
## (phi) 1.906624e+03 2.357898e+03
No significant association between SS_1yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SS_6moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SS_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1467 -0.3920 -0.0622 0.2813 5.3081
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.71625 0.02873 -233.751 <2e-16 ***
## SS_6moPreSamp 0.03040 0.04273 0.711 0.477
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2111.3 116.9 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 3 Df
## Pseudo R-squared: 0.001616
## Number of iterations: 3343 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.77256322 -6.6599338
## SS_6moPreSamp -0.05335769 0.1141513
## (phi) 1882.26208737 2340.3062579
No significant association between SS_6moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SS_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SS_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.2824 -0.3913 -0.0672 0.2802 5.3216
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.604437 0.187505 -35.223 <2e-16 ***
## SS_6moPreSamp 0.032491 0.042805 0.759 0.448
## sexFemale 0.004555 0.049435 0.092 0.927
## age_dx -0.001379 0.002647 -0.521 0.602
## smokeHxEver -0.030107 0.044404 -0.678 0.498
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2114 117 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 6 Df
## Pseudo R-squared: 0.004115
## Number of iterations: 221 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.971940e+00 -6.236934e+00
## SS_6moPreSamp -5.140560e-02 1.163868e-01
## sexFemale -9.233491e-02 1.014454e-01
## age_dx -6.567051e-03 3.809361e-03
## smokeHxEver -1.171359e-01 5.692263e-02
## (phi) 1.884436e+03 2.342990e+03
No significant association between SS_6moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SS_3moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SS_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0509 -0.3861 -0.0626 0.2710 5.0370
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.65866 0.03094 -215.183 <2e-16 ***
## SS_3moPreSamp 0.01610 0.04427 0.364 0.716
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1865.8 110.5 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 3 Df
## Pseudo R-squared: 0.0004941
## Number of iterations: 1096 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.71930994 -6.5980108
## SS_3moPreSamp -0.07066867 0.1028688
## (phi) 1649.25840942 2082.3260149
No significant association between SS_3moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SS_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SS_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1411 -0.3960 -0.0554 0.2635 5.0402
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.5932796 0.2021169 -32.621 <2e-16 ***
## SS_3moPreSamp 0.0187923 0.0443956 0.423 0.672
## sexFemale -0.0095203 0.0543244 -0.175 0.861
## age_dx -0.0005766 0.0028861 -0.200 0.842
## smokeHxEver -0.0387379 0.0490000 -0.791 0.429
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1868.0 110.6 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 6 Df
## Pseudo R-squared: 0.003035
## Number of iterations: 183 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.989421e+00 -6.197138e+00
## SS_3moPreSamp -6.822136e-02 1.058061e-01
## sexFemale -1.159941e-01 9.695354e-02
## age_dx -6.233257e-03 5.080085e-03
## smokeHxEver -1.347763e-01 5.730040e-02
## (phi) 1.651183e+03 2.084737e+03
No significant association between SS_3moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ SS_1moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ SS_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0353 -0.4032 -0.0662 0.2776 4.9484
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.635967 0.031196 -212.721 <2e-16 ***
## SS_1moPreSamp -0.002166 0.041731 -0.052 0.959
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1787.7 107.6 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 3 Df
## Pseudo R-squared: 1.061e-05
## Number of iterations: 718 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.69710948 -6.57482463
## SS_1moPreSamp -0.08395841 0.07962586
## (phi) 1576.84776623 1998.50651478
No significant association between SS_1moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ SS_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ SS_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0840 -0.4058 -0.0683 0.2750 4.9495
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.578e+00 2.072e-01 -31.745 <2e-16 ***
## SS_1moPreSamp 1.913e-05 4.184e-02 0.000 1.000
## sexFemale -9.694e-04 5.521e-02 -0.018 0.986
## age_dx -5.139e-04 2.971e-03 -0.173 0.863
## smokeHxEver -3.651e-02 5.058e-02 -0.722 0.470
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1789.5 107.7 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 6 Df
## Pseudo R-squared: 0.002212
## Number of iterations: 163 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.984053e+00 -6.171799e+00
## SS_1moPreSamp -8.197974e-02 8.201800e-02
## sexFemale -1.091700e-01 1.072311e-01
## age_dx -6.337487e-03 5.309645e-03
## smokeHxEver -1.356441e-01 6.262374e-02
## (phi) 1.578459e+03 2.000532e+03
No significant association between SS_1moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ Soil_5yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_5yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0549 -0.3867 -0.0573 0.2892 5.3688
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.68287 0.04009 -166.679 <2e-16 ***
## Soil_5yrPreSamp -0.03231 0.05522 -0.585 0.559
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2140.9 115.3 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 3 Df
## Pseudo R-squared: 0.001072
## Number of iterations: 879 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7614568 -6.60429057
## Soil_5yrPreSamp -0.1405345 0.07592242
## (phi) 1915.0092460 2366.83027988
No significant association between Soil_5yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ Soil_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_5yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1199 -0.3929 -0.0731 0.2755 5.3806
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.588855 0.185056 -35.605 <2e-16 ***
## Soil_5yrPreSamp -0.029329 0.055365 -0.530 0.596
## sexFemale 0.015518 0.047661 0.326 0.745
## age_dx -0.001264 0.002604 -0.485 0.627
## smokeHxEver -0.020285 0.043115 -0.470 0.638
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2142.8 115.4 18.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4401 on 6 Df
## Pseudo R-squared: 0.002947
## Number of iterations: 255 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.951558e+00 -6.226153e+00
## Soil_5yrPreSamp -1.378416e-01 7.918383e-02
## sexFemale -7.789655e-02 1.089322e-01
## age_dx -6.366476e-03 3.839217e-03
## smokeHxEver -1.047890e-01 6.421995e-02
## (phi) 1.916735e+03 2.368949e+03
No significant association between Soil_5yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ Soil_1yrPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_1yrPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0661 -0.3868 -0.0650 0.2811 5.3484
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.696799 0.041699 -160.599 <2e-16 ***
## Soil_1yrPreSamp -0.007922 0.059754 -0.133 0.895
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2130 115 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4375 on 3 Df
## Pseudo R-squared: 5.759e-05
## Number of iterations: 578 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7785277 -6.6150711
## Soil_1yrPreSamp -0.1250372 0.1091924
## (phi) 1904.7442495 2355.5885911
No significant association between Soil_1yrPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ Soil_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_1yrPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1157 -0.3983 -0.0676 0.2707 5.3610
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.597476 0.186468 -35.381 <2e-16 ***
## Soil_1yrPreSamp -0.006170 0.059746 -0.103 0.918
## sexFemale 0.015352 0.047873 0.321 0.748
## age_dx -0.001320 0.002607 -0.506 0.613
## smokeHxEver -0.021368 0.043188 -0.495 0.621
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2132.2 115.1 18.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4376 on 6 Df
## Pseudo R-squared: 0.0021
## Number of iterations: 294 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.962946e+00 -6.232005e+00
## Soil_1yrPreSamp -1.232706e-01 1.109301e-01
## sexFemale -7.847653e-02 1.091805e-01
## age_dx -6.429905e-03 3.790734e-03
## smokeHxEver -1.060151e-01 6.327939e-02
## (phi) 1.906590e+03 2.357856e+03
No significant association between Soil_1yrPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ Soil_6moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_6moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0559 -0.3802 -0.0641 0.2780 5.3166
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.701518 0.038543 -173.872 <2e-16 ***
## Soil_6moPreSamp -0.005203 0.053030 -0.098 0.922
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2109.7 116.8 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 3 Df
## Pseudo R-squared: 3.52e-05
## Number of iterations: 1019 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7770611 -6.62597566
## Soil_6moPreSamp -0.1091401 0.09873448
## (phi) 1880.8874972 2338.60883090
No significant association between Soil_6moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ Soil_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_6moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1220 -0.3952 -0.0687 0.2738 5.3309
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.591913 0.189159 -34.849 <2e-16 ***
## Soil_6moPreSamp -0.003896 0.053067 -0.073 0.941
## sexFemale 0.003649 0.049457 0.074 0.941
## age_dx -0.001361 0.002649 -0.514 0.607
## smokeHxEver -0.028146 0.044359 -0.634 0.526
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 2111.9 116.9 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 4168 on 6 Df
## Pseudo R-squared: 0.002283
## Number of iterations: 196 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.962658e+00 -6.221168e+00
## Soil_6moPreSamp -1.079061e-01 1.001131e-01
## sexFemale -9.328536e-02 1.005829e-01
## age_dx -6.552341e-03 3.830855e-03
## smokeHxEver -1.150878e-01 5.879679e-02
## (phi) 1.882855e+03 2.341039e+03
No significant association between Soil_6moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ Soil_3moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_3moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0520 -0.3909 -0.0637 0.2718 5.0391
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.66071 0.03806 -175.008 <2e-16 ***
## Soil_3moPreSamp 0.01380 0.04695 0.294 0.769
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1865.7 110.5 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 3 Df
## Pseudo R-squared: 0.0003832
## Number of iterations: 1145 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.7353016 -6.5861115
## Soil_3moPreSamp -0.0782205 0.1058194
## (phi) 1649.1480175 2082.1876444
No significant association between Soil_3moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ Soil_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_3moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.1258 -0.3985 -0.0612 0.2637 5.0427
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.5988793 0.2045835 -32.255 <2e-16 ***
## Soil_3moPreSamp 0.0143826 0.0470619 0.306 0.760
## sexFemale -0.0088614 0.0544092 -0.163 0.871
## age_dx -0.0005267 0.0028875 -0.182 0.855
## smokeHxEver -0.0376970 0.0489020 -0.771 0.441
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1867.7 110.6 16.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3619 on 6 Df
## Pseudo R-squared: 0.002781
## Number of iterations: 274 (BFGS) + 5 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.999856e+00 -6.197903e+00
## Soil_3moPreSamp -7.785708e-02 1.066223e-01
## sexFemale -1.155015e-01 9.777870e-02
## age_dx -6.186157e-03 5.132669e-03
## smokeHxEver -1.335433e-01 5.814920e-02
## (phi) 1.650963e+03 2.084461e+03
No significant association between Soil_3moPreSamp and prop_5mC in this model.
Unadjusted model
dnam_model1 <- betareg(prop_5mC ~ Soil_1moPreSamp, data=dnam)
summary(dnam_model1)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_1moPreSamp, data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0515 -0.3998 -0.0641 0.2723 4.9462
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.64390 0.03768 -176.345 <2e-16 ***
## Soil_1moPreSamp 0.01192 0.04470 0.267 0.79
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1787.9 107.6 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3499 on 3 Df
## Pseudo R-squared: 0.0003353
## Number of iterations: 1418 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
## 2.5 % 97.5 %
## (Intercept) -6.71774017 -6.57005506
## Soil_1moPreSamp -0.07569086 0.09952216
## (phi) 1577.05602332 1998.76822882
No significant association between Soil_1moPreSamp and prop_5mC in this model.
Adjusted model with age_dx, sex, smokeHx
dnam_model2 <- betareg(prop_5mC ~ Soil_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
##
## Call:
## betareg(formula = prop_5mC ~ Soil_1moPreSamp + sex + age_dx + smokeHx,
## data = dnam)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.0707 -0.4040 -0.0658 0.2791 4.9473
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.587e+00 2.096e-01 -31.423 <2e-16 ***
## Soil_1moPreSamp 1.254e-02 4.485e-02 0.280 0.780
## sexFemale 8.390e-06 5.531e-02 0.000 1.000
## age_dx -4.849e-04 2.972e-03 -0.163 0.870
## smokeHxEver -3.677e-02 5.050e-02 -0.728 0.466
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 1789.8 107.7 16.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 3500 on 6 Df
## Pseudo R-squared: 0.002581
## Number of iterations: 176 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
## 2.5 % 97.5 %
## (Intercept) -6.998324e+00 -6.176552e+00
## Soil_1moPreSamp -7.536798e-02 1.004392e-01
## sexFemale -1.084026e-01 1.084194e-01
## age_dx -6.309719e-03 5.339838e-03
## smokeHxEver -1.357450e-01 6.220024e-02
## (phi) 1.578696e+03 2.000829e+03
No significant association between Soil_1moPreSamp and prop_5mC in this model.
Base model
coxPH_model1 <- coxph(Surv(time_DeathTxCensor, deadORtx==1) ~ pct_5mC, data=dnam, id=ID)
summary(coxPH_model1)
## Call:
## coxph(formula = Surv(time_DeathTxCensor, deadORtx == 1) ~ pct_5mC,
## data = dnam, id = ID)
##
## n= 746, number of events= 221
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pct_5mC -0.1470 0.8633 0.4149 -0.354 0.723
##
## exp(coef) exp(-coef) lower .95 upper .95
## pct_5mC 0.8633 1.158 0.3828 1.947
##
## Concordance= 0.563 (se = 0.022 )
## Likelihood ratio test= 0.14 on 1 df, p=0.7
## Wald test = 0.13 on 1 df, p=0.7
## Score (logrank) test = 0.13 on 1 df, p=0.7
No significant association between %5mC and mortality in this model.
Partial Model
coxPH_model2 <- coxph(Surv(time_DeathTxCensor, deadORtx==1) ~ pct_5mC + age_dx + sex + smokeHx, data=dnam, id=ID)
summary(coxPH_model2)
## Call:
## coxph(formula = Surv(time_DeathTxCensor, deadORtx == 1) ~ pct_5mC +
## age_dx + sex + smokeHx, data = dnam, id = ID)
##
## n= 746, number of events= 221
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pct_5mC -0.179632 0.835578 0.387670 -0.463 0.64310
## age_dx 0.024722 1.025030 0.009071 2.725 0.00642 **
## sexFemale -0.280008 0.755778 0.167263 -1.674 0.09412 .
## smokeHxEver 0.428896 1.535562 0.150184 2.856 0.00429 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pct_5mC 0.8356 1.1968 0.3908 1.786
## age_dx 1.0250 0.9756 1.0070 1.043
## sexFemale 0.7558 1.3231 0.5445 1.049
## smokeHxEver 1.5356 0.6512 1.1440 2.061
##
## Concordance= 0.599 (se = 0.021 )
## Likelihood ratio test= 21.2 on 4 df, p=3e-04
## Wald test = 20.02 on 4 df, p=5e-04
## Score (logrank) test = 20.17 on 4 df, p=5e-04
No significant association between %5mC and mortality in this model.
Complete Model
coxPH_model3 <- coxph(Surv(time_DeathTxCensor, deadORtx==1) ~ pct_5mC + age_dx + sex + smokeHx + dich_Race + fvc_pct + dlco_pct, data=dnam, id=ID)
summary(coxPH_model3)
## Call:
## coxph(formula = Surv(time_DeathTxCensor, deadORtx == 1) ~ pct_5mC +
## age_dx + sex + smokeHx + dich_Race + fvc_pct + dlco_pct,
## data = dnam, id = ID)
##
## n= 637, number of events= 182
## (109 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pct_5mC 0.283617 1.327924 0.374374 0.758 0.4487
## age_dx 0.017992 1.018155 0.010130 1.776 0.0757 .
## sexFemale -0.242890 0.784358 0.187603 -1.295 0.1954
## smokeHxEver 0.187012 1.205641 0.169357 1.104 0.2695
## dich_RaceNon-White -0.039410 0.961356 0.330653 -0.119 0.9051
## fvc_pct -0.010389 0.989665 0.005351 -1.941 0.0522 .
## dlco_pct -0.047091 0.954001 0.006616 -7.118 1.1e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pct_5mC 1.3279 0.7531 0.6375 2.7659
## age_dx 1.0182 0.9822 0.9981 1.0386
## sexFemale 0.7844 1.2749 0.5430 1.1329
## smokeHxEver 1.2056 0.8294 0.8651 1.6803
## dich_RaceNon-White 0.9614 1.0402 0.5028 1.8380
## fvc_pct 0.9897 1.0104 0.9793 1.0001
## dlco_pct 0.9540 1.0482 0.9417 0.9665
##
## Concordance= 0.728 (se = 0.019 )
## Likelihood ratio test= 98.07 on 7 df, p=<2e-16
## Wald test = 90.24 on 7 df, p=<2e-16
## Score (logrank) test = 87.62 on 7 df, p=4e-16
No significant association between %5mC and mortality in this model.
Base Model
FVC_model1 <- lm(fvc_pct ~ pct_5mC, data=dnam)
summary(FVC_model1)
##
## Call:
## lm(formula = fvc_pct ~ pct_5mC, data = dnam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.491 -12.626 -0.945 11.722 52.299
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.2899 0.7317 93.327 <2e-16 ***
## pct_5mC -1.9395 2.9459 -0.658 0.511
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.62 on 676 degrees of freedom
## (68 observations deleted due to missingness)
## Multiple R-squared: 0.0006408, Adjusted R-squared: -0.0008375
## F-statistic: 0.4335 on 1 and 676 DF, p-value: 0.5105
confint(FVC_model1)
## 2.5 % 97.5 %
## (Intercept) 66.85321 69.726671
## pct_5mC -7.72373 3.844698
No significant association between %5mC and baseline FVC.
Partial Model
FVC_model2 <- lm(fvc_pct ~ pct_5mC + sex + age_dx, data=dnam)
summary(FVC_model2)
##
## Call:
## lm(formula = fvc_pct ~ pct_5mC + sex + age_dx, data = dnam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.727 -11.494 -0.959 11.328 50.134
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.11370 5.53407 7.791 2.53e-14 ***
## pct_5mC -1.35251 2.90813 -0.465 0.642
## sexFemale 1.79770 1.45654 1.234 0.218
## age_dx 0.35917 0.07903 4.545 6.52e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.39 on 674 degrees of freedom
## (68 observations deleted due to missingness)
## Multiple R-squared: 0.03124, Adjusted R-squared: 0.02693
## F-statistic: 7.245 on 3 and 674 DF, p-value: 8.664e-05
confint(FVC_model2)
## 2.5 % 97.5 %
## (Intercept) 32.247610 53.9797940
## pct_5mC -7.062587 4.3575710
## sexFemale -1.062205 4.6576114
## age_dx 0.203992 0.5143502
No significant association between %5mC and baseline FVC.
Complete Model
FVC_model3 <- lm(fvc_pct ~ pct_5mC + sex + age_dx + dich_Race + smokeHx, data=dnam)
summary(FVC_model3)
##
## Call:
## lm(formula = fvc_pct ~ pct_5mC + sex + age_dx + dich_Race + smokeHx,
## data = dnam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.127 -11.729 -0.653 11.433 49.479
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.12376 5.59302 7.531 1.62e-13 ***
## pct_5mC -1.42849 2.90506 -0.492 0.623
## sexFemale 1.98176 1.46636 1.351 0.177
## age_dx 0.36560 0.07905 4.625 4.50e-06 ***
## dich_RaceNon-White -4.67740 2.84535 -1.644 0.101
## smokeHxEver 1.19281 1.31367 0.908 0.364
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.37 on 672 degrees of freedom
## (68 observations deleted due to missingness)
## Multiple R-squared: 0.03636, Adjusted R-squared: 0.02919
## F-statistic: 5.072 on 5 and 672 DF, p-value: 0.0001427
confint(FVC_model3)
## 2.5 % 97.5 %
## (Intercept) 31.1418685 53.1056610
## pct_5mC -7.1325846 4.2755963
## sexFemale -0.8974351 4.8609471
## age_dx 0.2103857 0.5208138
## dich_RaceNon-White -10.2642367 0.9094403
## smokeHxEver -1.3865802 3.7721931
No significant association between %5mC and baseline FVC.
Base Model
DLCO_model1 <- lm(dlco_pct ~ pct_5mC, data=dnam)
summary(DLCO_model1)
##
## Call:
## lm(formula = dlco_pct ~ pct_5mC, data = dnam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.180 -10.587 -1.157 8.675 128.215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.5807 0.6932 58.544 <2e-16 ***
## pct_5mC 1.2988 2.7239 0.477 0.634
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.35 on 636 degrees of freedom
## (108 observations deleted due to missingness)
## Multiple R-squared: 0.0003573, Adjusted R-squared: -0.001214
## F-statistic: 0.2273 on 1 and 636 DF, p-value: 0.6337
confint(DLCO_model1)
## 2.5 % 97.5 %
## (Intercept) 39.219564 41.941931
## pct_5mC -4.050202 6.647734
No significant association between %5mC and baseline DLCO
Partial Model
DLCO_model2 <- lm(dlco_pct ~ pct_5mC + sex + age_dx, data=dnam)
summary(DLCO_model2)
##
## Call:
## lm(formula = dlco_pct ~ pct_5mC + sex + age_dx, data = dnam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.675 -10.335 -1.376 8.650 128.622
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.98484 5.36943 8.192 1.42e-15 ***
## pct_5mC 1.30348 2.72686 0.478 0.633
## sexFemale 1.62893 1.39816 1.165 0.244
## age_dx -0.05576 0.07672 -0.727 0.468
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.34 on 634 degrees of freedom
## (108 observations deleted due to missingness)
## Multiple R-squared: 0.003633, Adjusted R-squared: -0.001081
## F-statistic: 0.7707 on 3 and 634 DF, p-value: 0.5107
confint(DLCO_model2)
## 2.5 % 97.5 %
## (Intercept) 33.4408166 54.52886557
## pct_5mC -4.0512810 6.65824801
## sexFemale -1.1166552 4.37450659
## age_dx -0.2064242 0.09490435
No significant association between %5mC and baseline DLCO
Complete Model
DLCO_model3 <- lm(dlco_pct ~ pct_5mC + sex + age_dx + dich_Race + smokeHx, data=dnam)
summary(DLCO_model3)
##
## Call:
## lm(formula = dlco_pct ~ pct_5mC + sex + age_dx + dich_Race +
## smokeHx, data = dnam)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.586 -9.884 -0.975 7.828 128.148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.01113 5.38189 8.735 < 2e-16 ***
## pct_5mC 1.39641 2.70181 0.517 0.605449
## sexFemale 0.95389 1.39781 0.682 0.495224
## age_dx -0.05614 0.07611 -0.738 0.461048
## dich_RaceNon-White 2.05384 2.72268 0.754 0.450923
## smokeHxEver -4.59330 1.26719 -3.625 0.000312 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.2 on 632 degrees of freedom
## (108 observations deleted due to missingness)
## Multiple R-squared: 0.02509, Adjusted R-squared: 0.01737
## F-statistic: 3.253 on 5 and 632 DF, p-value: 0.006578
confint(DLCO_model3)
## 2.5 % 97.5 %
## (Intercept) 36.4425873 57.57968163
## pct_5mC -3.9092120 6.70202324
## sexFemale -1.7910254 3.69881159
## age_dx -0.2055931 0.09332124
## dich_RaceNon-White -3.2927491 7.40042565
## smokeHxEver -7.0817190 -2.10488804
No significant association between %5mC and baseline DLCO.